@@ -372,6 +372,97 @@ getSomeReg expr = do
372
372
Fixed rep reg code ->
373
373
return (reg, rep, code)
374
374
375
+ {- Note [Aarch64 immediates]
376
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
377
+ Aarch64 with it's fixed width instruction encoding uses leftover space for
378
+ immediates.
379
+ If you want the full rundown consult the arch reference document:
380
+ "Arm® Architecture Reference Manual" - "C3.4 Data processing - immediate"
381
+
382
+ The gist of it is that different instructions allow for different immediate encodings.
383
+ The ones we care about for better code generation are:
384
+
385
+ * Simple but potentially repeated bit-patterns for logic instructions.
386
+ * 16bit numbers shifted by multiples of 16.
387
+ * 12 bit numbers optionally shifted by 12 bits.
388
+
389
+ It might seem like the ISA allows for 64bit immediates but this isn't the case.
390
+ Rather there are some instruction aliases which allow for large unencoded immediates
391
+ which will then be transalted to one of the immediate encodings implicitly.
392
+
393
+ For example mov x1, #0x10000 is allowed but will be assembled to movz x1, #0x1, lsl #16
394
+ -}
395
+
396
+ -- | Move (wide immediate)
397
+ -- Allows for 16bit immediate which can be shifted by 0/16/32/48 bits.
398
+ -- Used with MOVZ,MOVN, MOVK
399
+ -- See Note [Aarch64 immediates]
400
+ getMovWideImm :: Integer -> Width -> Maybe Operand
401
+ getMovWideImm n w
402
+ -- TODO: Handle sign extension/negatives
403
+ | n <= 0
404
+ = Nothing
405
+ -- Fits in 16 bits
406
+ | sized_n < 2 ^ (16 :: Int )
407
+ = Just $ OpImm (ImmInteger truncated)
408
+
409
+ -- 0x0000 0000 xxxx 0000
410
+ | trailing_zeros >= 16 && sized_n < 2 ^ (32 :: Int )
411
+ = Just $ OpImmShift (ImmInteger $ truncated `shiftR` 16 ) SLSL 16
412
+
413
+ -- 0x 0000 xxxx 0000 0000
414
+ | trailing_zeros >= 32 && sized_n < 2 ^ (48 :: Int )
415
+ = Just $ OpImmShift (ImmInteger $ truncated `shiftR` 32 ) SLSL 32
416
+
417
+ -- 0x xxxx 0000 0000 0000
418
+ | trailing_zeros >= 48
419
+ = Just $ OpImmShift (ImmInteger $ truncated `shiftR` 48 ) SLSL 48
420
+
421
+ | otherwise
422
+ = Nothing
423
+ where
424
+ truncated = narrowU w n
425
+ sized_n = fromIntegral truncated :: Word64
426
+ trailing_zeros = countTrailingZeros sized_n
427
+
428
+ -- | Arithmetic(immediate)
429
+ -- Allows for 12bit immediates which can be shifted by 0 or 12 bits.
430
+ -- Used with ADD, ADDS, SUB, SUBS, CMP, CMN
431
+ -- See Note [Aarch64 immediates]
432
+ getArithImm :: Integer -> Width -> Maybe Operand
433
+ getArithImm n w
434
+ -- TODO: Handle sign extension
435
+ | n <= 0
436
+ = Nothing
437
+ -- Fits in 16 bits
438
+ -- Fits in 12 bits
439
+ | sized_n < 2 ^ (12 :: Int )
440
+ = Just $ OpImm (ImmInteger truncated)
441
+
442
+ -- 12 bits shifted by 12 places.
443
+ | trailing_zeros >= 12 && sized_n < 2 ^ (24 :: Int )
444
+ = Just $ OpImmShift (ImmInteger $ truncated `shiftR` 12 ) SLSL 12
445
+
446
+ | otherwise
447
+ = Nothing
448
+ where
449
+ sized_n = fromIntegral truncated :: Word64
450
+ truncated = narrowU w n
451
+ trailing_zeros = countTrailingZeros sized_n
452
+
453
+ -- | Logical (immediate)
454
+ -- Allows encoding of some repeated bitpatterns
455
+ -- Used with AND, ANDS, EOR, ORR, TST
456
+ -- and their aliases which includes at least MOV (bitmask immediate)
457
+ -- See Note [Aarch64 immediates]
458
+ getBitmaskImm :: Integer -> Width -> Maybe Operand
459
+ getBitmaskImm n w
460
+ | isAArch64Bitmask truncated = Just $ OpImm (ImmInteger truncated)
461
+ | otherwise = Nothing
462
+ where
463
+ truncated = narrowU w n
464
+
465
+
375
466
-- TODO OPT: we might be able give getRegister
376
467
-- a hint, what kind of register we want.
377
468
getFloatReg :: HasCallStack => CmmExpr -> NatM (Reg , Format , InstrBlock )
@@ -494,8 +585,14 @@ getRegister' config plat expr
494
585
CmmLit lit
495
586
-> case lit of
496
587
497
- -- TODO handle CmmInt 0 specially, use wzr or xzr.
498
-
588
+ -- Use wzr xzr for CmmInt 0 if the width matches up, otherwise do a move.
589
+ -- TODO: Reenable after https://gitlab.haskell.org/ghc/ghc/-/issues/23632 is fixed.
590
+ -- CmmInt 0 W32 -> do
591
+ -- let format = intFormat W32
592
+ -- return (Fixed format reg_zero (unitOL $ (COMMENT ((text . show $ expr))) ))
593
+ -- CmmInt 0 W64 -> do
594
+ -- let format = intFormat W64
595
+ -- return (Fixed format reg_zero (unitOL $ (COMMENT ((text . show $ expr))) ))
499
596
CmmInt i W8 | i >= 0 -> do
500
597
return (Any (intFormat W8 ) (\ dst -> unitOL $ annExpr expr (MOV (OpReg W8 dst) (OpImm (ImmInteger (narrowU W8 i))))))
501
598
CmmInt i W16 | i >= 0 -> do
@@ -510,8 +607,13 @@ getRegister' config plat expr
510
607
-- Those need the upper bits set. We'd either have to explicitly sign
511
608
-- or figure out something smarter. Lowered to
512
609
-- `MOV dst XZR`
610
+ CmmInt i w | i >= 0
611
+ , Just imm_op <- getMovWideImm i w -> do
612
+ return (Any (intFormat w) (\ dst -> unitOL $ annExpr expr (MOVZ (OpReg w dst) imm_op)))
613
+
513
614
CmmInt i w | isNbitEncodeable 16 i, i >= 0 -> do
514
615
return (Any (intFormat w) (\ dst -> unitOL $ annExpr expr (MOV (OpReg W16 dst) (OpImm (ImmInteger i)))))
616
+
515
617
CmmInt i w | isNbitEncodeable 32 i, i >= 0 -> do
516
618
let half0 = fromIntegral (fromIntegral i :: Word16 )
517
619
half1 = fromIntegral (fromIntegral (i `shiftR` 16 ) :: Word16 )
@@ -586,7 +688,6 @@ getRegister' config plat expr
586
688
(op, imm_code) <- litToImm' lit
587
689
let rep = cmmLitType plat lit
588
690
format = cmmTypeFormat rep
589
- -- width = typeWidth rep
590
691
return (Any format (\ dst -> imm_code `snocOL` LDR format (OpReg (formatToWidth format) dst) op))
591
692
592
693
CmmLabelOff lbl off -> do
@@ -791,17 +892,51 @@ getRegister' config plat expr
791
892
-- withTempFloatReg w op = OpReg w <$> getNewRegNat (floatFormat w) >>= op
792
893
793
894
-- A "plain" operation.
794
- bitOp w op = do
895
+ bitOpImm w op encode_imm = do
795
896
-- compute x<m> <- x
796
897
-- compute x<o> <- y
797
898
-- <OP> x<n>, x<m>, x<o>
798
899
(reg_x, format_x, code_x) <- getSomeReg x
799
- (reg_y, format_y, code_y) <- getSomeReg y
800
- massertPpr (isIntFormat format_x == isIntFormat format_y) $ text " bitOp: incompatible"
900
+ (op_y, format_y, code_y) <- case y of
901
+ CmmLit (CmmInt n w)
902
+ | Just imm_operand_y <- encode_imm n w
903
+ -> return (imm_operand_y, intFormat w, nilOL)
904
+ _ -> do
905
+ (reg_y, format_y, code_y) <- getSomeReg y
906
+ return (OpReg w reg_y, format_y, code_y)
907
+ massertPpr (isIntFormat format_x == isIntFormat format_y) $ text " bitOpImm: incompatible"
801
908
return $ Any (intFormat w) (\ dst ->
802
909
code_x `appOL`
803
910
code_y `appOL`
804
- op (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y))
911
+ op (OpReg w dst) (OpReg w reg_x) op_y)
912
+
913
+ -- A (potentially signed) integer operation.
914
+ -- In the case of 8- and 16-bit signed arithmetic we must first
915
+ -- sign-extend both arguments to 32-bits.
916
+ -- See Note [Signed arithmetic on AArch64].
917
+ intOpImm :: Bool -> Width -> (Operand -> Operand -> Operand -> OrdList Instr ) -> (Integer -> Width -> Maybe Operand ) -> NatM (Register )
918
+ intOpImm {- is signed -} True w op _encode_imm = intOp True w op
919
+ intOpImm False w op encode_imm = do
920
+ -- compute x<m> <- x
921
+ -- compute x<o> <- y
922
+ -- <OP> x<n>, x<m>, x<o>
923
+ (reg_x, format_x, code_x) <- getSomeReg x
924
+ (op_y, format_y, code_y) <- case y of
925
+ CmmLit (CmmInt n w)
926
+ | Just imm_operand_y <- encode_imm n w
927
+ -> return (imm_operand_y, intFormat w, nilOL)
928
+ _ -> do
929
+ (reg_y, format_y, code_y) <- getSomeReg y
930
+ return (OpReg w reg_y, format_y, code_y)
931
+ massertPpr (isIntFormat format_x && isIntFormat format_y) $ text " intOp: non-int"
932
+ -- This is the width of the registers on which the operation
933
+ -- should be performed.
934
+ let w' = opRegWidth w
935
+ return $ Any (intFormat w) $ \ dst ->
936
+ code_x `appOL`
937
+ code_y `appOL`
938
+ op (OpReg w' dst) (OpReg w' reg_x) (op_y) `appOL`
939
+ truncateReg w' w dst -- truncate back to the operand's original width
805
940
806
941
-- A (potentially signed) integer operation.
807
942
-- In the case of 8- and 16-bit signed arithmetic we must first
@@ -847,9 +982,9 @@ getRegister' config plat expr
847
982
case op of
848
983
-- Integer operations
849
984
-- Add/Sub should only be Integer Options.
850
- MO_Add w -> intOp False w (\ d x y -> unitOL $ annExpr expr (ADD d x y))
985
+ MO_Add w -> intOpImm False w (\ d x y -> unitOL $ annExpr expr (ADD d x y)) getArithImm
851
986
-- TODO: Handle sub-word case
852
- MO_Sub w -> intOp False w (\ d x y -> unitOL $ annExpr expr (SUB d x y))
987
+ MO_Sub w -> intOpImm False w (\ d x y -> unitOL $ annExpr expr (SUB d x y)) getArithImm
853
988
854
989
-- Note [CSET]
855
990
-- ~~~~~~~~~~~
@@ -891,8 +1026,8 @@ getRegister' config plat expr
891
1026
892
1027
-- N.B. We needn't sign-extend sub-word size (in)equality comparisons
893
1028
-- since we don't care about ordering.
894
- MO_Eq w -> bitOp w (\ d x y -> toOL [ CMP x y, CSET d EQ ])
895
- MO_Ne w -> bitOp w (\ d x y -> toOL [ CMP x y, CSET d NE ])
1029
+ MO_Eq w -> bitOpImm w (\ d x y -> toOL [ CMP x y, CSET d EQ ]) getArithImm
1030
+ MO_Ne w -> bitOpImm w (\ d x y -> toOL [ CMP x y, CSET d NE ]) getArithImm
896
1031
897
1032
-- Signed multiply/divide
898
1033
MO_Mul w -> intOp True w (\ d x y -> unitOL $ MUL d x y)
@@ -921,10 +1056,10 @@ getRegister' config plat expr
921
1056
MO_S_Lt w -> intOp True w (\ d x y -> toOL [ CMP x y, CSET d SLT ])
922
1057
923
1058
-- Unsigned comparisons
924
- MO_U_Ge w -> intOp False w (\ d x y -> toOL [ CMP x y, CSET d UGE ])
925
- MO_U_Le w -> intOp False w (\ d x y -> toOL [ CMP x y, CSET d ULE ])
926
- MO_U_Gt w -> intOp False w (\ d x y -> toOL [ CMP x y, CSET d UGT ])
927
- MO_U_Lt w -> intOp False w (\ d x y -> toOL [ CMP x y, CSET d ULT ])
1059
+ MO_U_Ge w -> intOpImm False w (\ d x y -> toOL [ CMP x y, CSET d UGE ]) getArithImm
1060
+ MO_U_Le w -> intOpImm False w (\ d x y -> toOL [ CMP x y, CSET d ULE ]) getArithImm
1061
+ MO_U_Gt w -> intOpImm False w (\ d x y -> toOL [ CMP x y, CSET d UGT ]) getArithImm
1062
+ MO_U_Lt w -> intOpImm False w (\ d x y -> toOL [ CMP x y, CSET d ULT ]) getArithImm
928
1063
929
1064
-- Floating point arithmetic
930
1065
MO_F_Add w -> floatOp w (\ d x y -> unitOL $ ADD d x y)
@@ -947,9 +1082,9 @@ getRegister' config plat expr
947
1082
MO_F_Lt w -> floatCond w (\ d x y -> toOL [ CMP x y, CSET d OLT ]) -- x < y <=> y >= x
948
1083
949
1084
-- Bitwise operations
950
- MO_And w -> bitOp w (\ d x y -> unitOL $ AND d x y)
951
- MO_Or w -> bitOp w (\ d x y -> unitOL $ ORR d x y)
952
- MO_Xor w -> bitOp w (\ d x y -> unitOL $ EOR d x y)
1085
+ MO_And w -> bitOpImm w (\ d x y -> unitOL $ AND d x y) getBitmaskImm
1086
+ MO_Or w -> bitOpImm w (\ d x y -> unitOL $ ORR d x y) getBitmaskImm
1087
+ MO_Xor w -> bitOpImm w (\ d x y -> unitOL $ EOR d x y) getBitmaskImm
953
1088
MO_Shl w -> intOp False w (\ d x y -> unitOL $ LSL d x y)
954
1089
MO_U_Shr w -> intOp False w (\ d x y -> unitOL $ LSR d x y)
955
1090
MO_S_Shr w -> intOp True w (\ d x y -> unitOL $ ASR d x y)
@@ -999,7 +1134,7 @@ getRegister' config plat expr
999
1134
1000
1135
where
1001
1136
isNbitEncodeable :: Int -> Integer -> Bool
1002
- isNbitEncodeable n i = let shift = n - 1 in (- 1 `shiftL` shift) <= i && i < (1 `shiftL` shift)
1137
+ isNbitEncodeable n_bits i = let shift = n_bits - 1 in (- 1 `shiftL` shift) <= i && i < (1 `shiftL` shift)
1003
1138
1004
1139
-- N.B. MUL does not set the overflow flag.
1005
1140
do_mul_may_oflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
0 commit comments