@@ -3895,6 +3895,21 @@ def _float_min_max(expr):
38953895pmulhd = vec_vertical_instr ('*' , 32 , lambda x : _keep_mul_high (x , signed = True ))
38963896pmulhq = vec_vertical_instr ('*' , 64 , lambda x : _keep_mul_high (x , signed = True ))
38973897
3898+ def pmuldq (ir , instr , dst , src ):
3899+ e = []
3900+ if dst .size != 128 :
3901+ raise RuntimeError ("Unsupported size %d" % dst .size )
3902+
3903+ e .append (m2_expr .ExprAssign (
3904+ dst [:64 ],
3905+ src [:32 ].signExtend (64 ) * dst [:32 ].signExtend (64 )
3906+ ))
3907+ e .append (m2_expr .ExprAssign (
3908+ dst [64 :],
3909+ src [64 :96 ].signExtend (64 ) * dst [64 :96 ].signExtend (64 )
3910+ ))
3911+ return e , []
3912+
38983913def pmuludq (ir , instr , dst , src ):
38993914 e = []
39003915 if dst .size == 64 :
@@ -3976,9 +3991,11 @@ def _average(expr):
39763991pminub = vec_vertical_instr ('min' , 8 , lambda x : _min_max (x , signed = False ))
39773992pminuw = vec_vertical_instr ('min' , 16 , lambda x : _min_max (x , signed = False ))
39783993pminud = vec_vertical_instr ('min' , 32 , lambda x : _min_max (x , signed = False ))
3994+ pminsd = vec_vertical_instr ('min' , 32 , lambda x : _min_max (x , signed = True ))
39793995pmaxub = vec_vertical_instr ('max' , 8 , lambda x : _min_max (x , signed = False ))
39803996pmaxuw = vec_vertical_instr ('max' , 16 , lambda x : _min_max (x , signed = False ))
39813997pmaxud = vec_vertical_instr ('max' , 32 , lambda x : _min_max (x , signed = False ))
3998+ pmaxsd = vec_vertical_instr ('max' , 32 , lambda x : _min_max (x , signed = True ))
39823999pmaxsw = vec_vertical_instr ('max' , 16 , lambda x : _min_max (x , signed = True ))
39834000
39844001# Floating-point arithmetic
@@ -4417,6 +4434,36 @@ def pshufhw(_, instr, dst, src, imm):
44174434 out .append (src [shift + 64 : shift + 16 + 64 ])
44184435 return [m2_expr .ExprAssign (dst , m2_expr .ExprCompose (* out ))], []
44194436
4437+ def ptest (_ , instr , dst , src ):
4438+ e = []
4439+ e .append (m2_expr .ExprAssign (zf , m2_expr .ExprOp ('FLAG_EQ' , dst & src )))
4440+ e .append (m2_expr .ExprAssign (cf , m2_expr .ExprOp ('FLAG_EQ' , src & ~ dst )))
4441+ e .append (m2_expr .ExprAssign (of , m2_expr .ExprInt (0 , 1 )))
4442+ e .append (m2_expr .ExprAssign (af , m2_expr .ExprInt (0 , 1 )))
4443+ e .append (m2_expr .ExprAssign (pf , m2_expr .ExprInt (0 , 1 )))
4444+ e .append (m2_expr .ExprAssign (nf , m2_expr .ExprInt (0 , 1 )))
4445+ return e , []
4446+
4447+ def _clmul64_to_128 (a64 , b64 ):
4448+ assert a64 .size == 64
4449+ assert b64 .size == 64
4450+
4451+ a128 = a64 .zeroExtend (128 )
4452+ res = m2_expr .ExprInt (0 , 128 )
4453+
4454+ for i in range (64 ):
4455+ bit = b64 [i :i + 1 ]
4456+ term = m2_expr .ExprCond (bit , a128 << m2_expr .ExprInt (i , 128 ), m2_expr .ExprInt (0 , 128 ))
4457+ res = m2_expr .ExprOp ('^' , res , term )
4458+
4459+ return expr_simp (res )
4460+
4461+ def pclmulqdq (_ , instr , dst , src , imm8 ):
4462+ control = int (imm8 )
4463+ a = dst [64 :128 ] if (control & 0x01 ) else dst [:64 ]
4464+ b = src [64 :128 ] if (control & 0x10 ) else src [:64 ]
4465+ res = _clmul64_to_128 (a , b )
4466+ return [m2_expr .ExprAssign (dst , res )], []
44204467
44214468def ps_rl_ll (ir , instr , dst , src , op , size ):
44224469 mask = {16 : 0xF ,
@@ -4594,7 +4641,6 @@ def punpckldq(ir, instr, dst, src):
45944641def punpcklqdq (ir , instr , dst , src ):
45954642 return punpck (ir , instr , dst , src , 64 , 0 )
45964643
4597-
45984644def pinsr (_ , instr , dst , src , imm , size ):
45994645 e = []
46004646
@@ -4682,6 +4728,52 @@ def unpcklpd(_, instr, dst, src):
46824728 e .append (m2_expr .ExprAssign (dst , src ))
46834729 return e , []
46844730
4731+ def pmovsxwd (ir , instr , dst , src ):
4732+ out = []
4733+ for i in range (4 ):
4734+ lane = src [16 * i :16 * (i + 1 )]
4735+ out .append (lane .signExtend (32 ))
4736+ return [m2_expr .ExprAssign (dst , m2_expr .ExprCompose (* out ))], []
4737+
4738+ def pmovsxwq (ir , instr , dst , src ):
4739+ e = []
4740+ if dst .size != 128 :
4741+ raise RuntimeError ("Unsupported size %d" % dst .size )
4742+
4743+ out = []
4744+ for i in range (2 ):
4745+ w = src [16 * i :16 * (i + 1 )]
4746+ out .append (w .signExtend (64 ))
4747+
4748+ e .append (m2_expr .ExprAssign (dst , m2_expr .ExprCompose (* out )))
4749+ return e , []
4750+
4751+ def pmovmskb (ir , instr , dst , src ):
4752+ e = []
4753+ e .append (m2_expr .ExprAssign (dst , src .zeroExtend (dst .size )))
4754+ return e , []
4755+
4756+ def pmovsxbd (ir , instr , dst , src ):
4757+ e = []
4758+ if dst .size != 128 :
4759+ raise RuntimeError ("Unsupported size %d" % dst .size )
4760+ out = []
4761+ for i in range (4 ):
4762+ b = src [8 * i : 8 * (i + 1 )]
4763+ out .append (b .signExtend (32 ))
4764+ e .append (m2_expr .ExprAssign (dst , m2_expr .ExprCompose (* out )))
4765+ return e , []
4766+
4767+ def pmovsxdq (_ , instr , dst , src ):
4768+ e = []
4769+ if dst .size != 128 :
4770+ raise RuntimeError ("Unsupported size %d" % dst .size )
4771+ out = []
4772+ for i in range (2 ):
4773+ d = src [32 * i : 32 * (i + 1 )]
4774+ out .append (d .signExtend (64 ))
4775+ e .append (m2_expr .ExprAssign (dst , m2_expr .ExprCompose (* out )))
4776+ return e , []
46854777
46864778def movlpd (_ , instr , dst , src ):
46874779 e = []
@@ -4805,6 +4897,36 @@ def palignr(ir, instr, dst, src, imm):
48054897
48064898 return [m2_expr .ExprAssign (dst , result )], []
48074899
4900+ def psign (ir , instr , dst , src , lane_size ):
4901+ if dst .size not in [64 , 128 ] or src .size != dst .size :
4902+ raise RuntimeError ("Unsupported size dst=%d src=%d" % (dst .size , src .size ))
4903+
4904+ out = []
4905+ for i in range (0 , dst .size , lane_size ):
4906+ data = dst [i :i + lane_size ]
4907+ control = src [i :i + lane_size ]
4908+ neg_data = (data ^ data .mask ) + m2_expr .ExprInt (1 , data .size )
4909+ out .append (
4910+ m2_expr .ExprCond (
4911+ control .msb (),
4912+ neg_data ,
4913+ m2_expr .ExprCond (
4914+ control ,
4915+ data ,
4916+ m2_expr .ExprInt (0 , data .size )
4917+ )
4918+ )
4919+ )
4920+ return [m2_expr .ExprAssign (dst , m2_expr .ExprCompose (* out ))], []
4921+
4922+ def psignb (ir , instr , dst , src ):
4923+ return psign (ir , instr , dst , src , 8 )
4924+
4925+ def psignw (ir , instr , dst , src ):
4926+ return psign (ir , instr , dst , src , 16 )
4927+
4928+ def psignd (ir , instr , dst , src ):
4929+ return psign (ir , instr , dst , src , 32 )
48084930
48094931def _signed_to_signed_saturation (expr , dst_size ):
48104932 """Saturate the expr @expr for @dst_size bit
@@ -4873,7 +4995,21 @@ def _signed_to_unsigned_saturation(expr, dst_size):
48734995 )
48744996 )
48754997
4998+ def phminposuw (ir , instr , dst , src ):
4999+ if dst .size != 128 or src .size != 128 :
5000+ raise RuntimeError ("Unsupported size dst=%d src=%d" % (dst .size , src .size ))
5001+
5002+ min_val = src [:16 ]
5003+ min_idx = m2_expr .ExprInt (0 , 16 )
48765004
5005+ for i in range (1 , 8 ):
5006+ word = src [i * 16 :(i + 1 ) * 16 ]
5007+ cond = m2_expr .expr_is_unsigned_lower (word , min_val )
5008+ min_val = m2_expr .ExprCond (cond , word , min_val )
5009+ min_idx = m2_expr .ExprCond (cond , m2_expr .ExprInt (i , 16 ), min_idx )
5010+
5011+ result = m2_expr .ExprCompose (min_val , min_idx , m2_expr .ExprInt (0 , 96 ))
5012+ return [m2_expr .ExprAssign (dst , result )], []
48775013
48785014def packsswb (ir , instr , dst , src ):
48795015 out = []
@@ -4898,6 +5034,12 @@ def packuswb(ir, instr, dst, src):
48985034 out .append (_signed_to_unsigned_saturation (source [start :start + 16 ], 8 ))
48995035 return [m2_expr .ExprAssign (dst , m2_expr .ExprCompose (* out ))], []
49005036
5037+ def packusdw (ir , instr , dst , src ):
5038+ out = []
5039+ for source in [dst , src ]:
5040+ for start in range (0 , dst .size , 32 ):
5041+ out .append (_signed_to_unsigned_saturation (source [start :start + 32 ], 16 ))
5042+ return [m2_expr .ExprAssign (dst , m2_expr .ExprCompose (* out ))], []
49015043
49025044def _saturation_sub_unsigned (expr ):
49035045 assert expr .is_op ("+" ) and len (expr .args ) == 2 and expr .args [- 1 ].is_op ("-" )
@@ -5533,6 +5675,7 @@ def fxrstor(_ir, _instr, _dst):
55335675 "pmulhw" : pmulhw ,
55345676 "pmulhd" : pmulhd ,
55355677 "pmulhq" : pmulhq ,
5678+ "pmuldq" : pmuldq ,
55365679 "pmuludq" : pmuludq ,
55375680
55385681 # Mix
@@ -5633,6 +5776,9 @@ def fxrstor(_ir, _instr, _dst):
56335776 "pshufd" : pshufd ,
56345777 "pshuflw" : pshuflw ,
56355778 "pshufhw" : pshufhw ,
5779+ "ptest" : ptest ,
5780+ "ptest" : ptest ,
5781+ "pclmulqdq" : pclmulqdq ,
56365782
56375783 "psrlw" : psrlw ,
56385784 "psrld" : psrld ,
@@ -5647,14 +5793,20 @@ def fxrstor(_ir, _instr, _dst):
56475793
56485794 "palignr" : palignr ,
56495795
5796+ "psignb" : psignb ,
5797+ "psignw" : psignw ,
5798+ "psignd" : psignd ,
5799+
56505800 "pmaxub" : pmaxub ,
56515801 "pmaxuw" : pmaxuw ,
56525802 "pmaxud" : pmaxud ,
5803+ "pmaxsd" : pmaxsd ,
56535804 "pmaxsw" : pmaxsw ,
56545805
56555806 "pminub" : pminub ,
56565807 "pminuw" : pminuw ,
56575808 "pminud" : pminud ,
5809+ "pminsd" : pminsd ,
56585810
56595811 "pcmpeqb" : pcmpeqb ,
56605812 "pcmpeqw" : pcmpeqw ,
@@ -5707,10 +5859,17 @@ def fxrstor(_ir, _instr, _dst):
57075859 "sqrtss" : sqrtss ,
57085860
57095861 "pmovmskb" : pmovmskb ,
5862+ "pmovsxwd" : pmovsxwd ,
5863+ "pmovsxwq" : pmovsxwq ,
5864+ "pmovsxbd" : pmovsxbd ,
5865+ "pmovsxdq" : pmovsxdq ,
5866+
5867+ "phminposuw" : phminposuw ,
57105868
57115869 "packsswb" : packsswb ,
57125870 "packssdw" : packssdw ,
57135871 "packuswb" : packuswb ,
5872+ "packusdw" : packusdw ,
57145873
57155874 "psubusb" : psubusb ,
57165875 "psubusw" : psubusw ,
0 commit comments