Skip to content

Commit 090c800

Browse files
recursion-ninjabgamari
authored andcommitted
Resolving issues #20645 and #26109
Correctly sign extending and casting smaller bit width types for LLVM operations: - bitReverse8# - bitReverse16# - bitReverse32# - byteSwap16# - byteSwap32# - pdep8# - pdep16# - pext8# - pext16# (cherry picked from commit 706d33e)
1 parent f080dec commit 090c800

File tree

13 files changed

+200
-100
lines changed

13 files changed

+200
-100
lines changed

compiler/GHC/Builtin/primops.txt.pp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@
148148
vector = []
149149
deprecated_msg = {} -- A non-empty message indicates deprecation
150150
div_like = False -- Second argument expected to be non zero - used for tests
151+
defined_bits = Nothing -- The number of bits the operation is defined for (if not all bits)
151152

152153
-- Note [When do out-of-line primops go in primops.txt.pp]
153154
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -1065,19 +1066,24 @@
10651066

10661067
primop BSwap16Op "byteSwap16#" GenPrimOp Word# -> Word#
10671068
{Swap bytes in the lower 16 bits of a word. The higher bytes are undefined. }
1069+
with defined_bits = 16
10681070
primop BSwap32Op "byteSwap32#" GenPrimOp Word# -> Word#
10691071
{Swap bytes in the lower 32 bits of a word. The higher bytes are undefined. }
1072+
with defined_bits = 32
10701073
primop BSwap64Op "byteSwap64#" GenPrimOp Word64# -> Word64#
10711074
{Swap bytes in a 64 bits of a word.}
10721075
primop BSwapOp "byteSwap#" GenPrimOp Word# -> Word#
10731076
{Swap bytes in a word.}
10741077

10751078
primop BRev8Op "bitReverse8#" GenPrimOp Word# -> Word#
10761079
{Reverse the order of the bits in a 8-bit word.}
1080+
with defined_bits = 8
10771081
primop BRev16Op "bitReverse16#" GenPrimOp Word# -> Word#
10781082
{Reverse the order of the bits in a 16-bit word.}
1083+
with defined_bits = 16
10791084
primop BRev32Op "bitReverse32#" GenPrimOp Word# -> Word#
10801085
{Reverse the order of the bits in a 32-bit word.}
1086+
with defined_bits = 32
10811087
primop BRev64Op "bitReverse64#" GenPrimOp Word64# -> Word64#
10821088
{Reverse the order of the bits in a 64-bit word.}
10831089
primop BRevOp "bitReverse#" GenPrimOp Word# -> Word#

compiler/GHC/CmmToLlvm/CodeGen.hs

Lines changed: 74 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -230,23 +230,22 @@ genCall t@(PrimTarget (MO_Prefetch_Data localityInt)) [] args
230230
statement $ Expr $ Call StdCall fptr (argVars' ++ argSuffix) []
231231
| otherwise = panic $ "prefetch locality level integer must be between 0 and 3, given: " ++ (show localityInt)
232232

233-
-- Handle PopCnt, Clz, Ctz, and BSwap that need to only convert arg
234-
-- and return types
235-
genCall t@(PrimTarget (MO_PopCnt w)) dsts args =
236-
genCallSimpleCast w t dsts args
237-
238-
genCall t@(PrimTarget (MO_Pdep w)) dsts args =
239-
genCallSimpleCast2 w t dsts args
240-
genCall t@(PrimTarget (MO_Pext w)) dsts args =
241-
genCallSimpleCast2 w t dsts args
242-
genCall t@(PrimTarget (MO_Clz w)) dsts args =
243-
genCallSimpleCast w t dsts args
244-
genCall t@(PrimTarget (MO_Ctz w)) dsts args =
245-
genCallSimpleCast w t dsts args
246-
genCall t@(PrimTarget (MO_BSwap w)) dsts args =
247-
genCallSimpleCast w t dsts args
248-
genCall t@(PrimTarget (MO_BRev w)) dsts args =
249-
genCallSimpleCast w t dsts args
233+
-- Handle Clz, Ctz, BRev, BSwap, Pdep, Pext, and PopCnt that need to only
234+
-- convert arg and return types
235+
genCall (PrimTarget op@(MO_Clz w)) [dst] args =
236+
genCallSimpleCast w op dst args
237+
genCall (PrimTarget op@(MO_Ctz w)) [dst] args =
238+
genCallSimpleCast w op dst args
239+
genCall (PrimTarget op@(MO_BRev w)) [dst] args =
240+
genCallSimpleCast w op dst args
241+
genCall (PrimTarget op@(MO_BSwap w)) [dst] args =
242+
genCallSimpleCast w op dst args
243+
genCall (PrimTarget op@(MO_Pdep w)) [dst] args =
244+
genCallSimpleCast w op dst args
245+
genCall (PrimTarget op@(MO_Pext w)) [dst] args =
246+
genCallSimpleCast w op dst args
247+
genCall (PrimTarget op@(MO_PopCnt w)) [dst] args =
248+
genCallSimpleCast w op dst args
250249

251250
genCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = runStmtsDecls $ do
252251
addrVar <- exprToVarW addr
@@ -640,63 +639,28 @@ genCallExtract _ _ _ _ =
640639
-- since GHC only really has i32 and i64 types and things like Word8 are backed
641640
-- by an i32 and just present a logical i8 range. So we must handle conversions
642641
-- from i32 to i8 explicitly as LLVM is strict about types.
643-
genCallSimpleCast :: Width -> ForeignTarget -> [CmmFormal] -> [CmmActual]
644-
-> LlvmM StmtData
645-
genCallSimpleCast w t@(PrimTarget op) [dst] args = do
646-
let width = widthToLlvmInt w
647-
dstTy = cmmToLlvmType $ localRegType dst
648-
649-
fname <- cmmPrimOpFunctions op
650-
(fptr, _, top3) <- getInstrinct fname width [width]
651-
652-
(dstV, _dst_ty) <- getCmmReg (CmmLocal dst)
653-
654-
let (_, arg_hints) = foreignTargetHints t
655-
let args_hints = zip args arg_hints
656-
(argsV, stmts2, top2) <- arg_vars args_hints ([], nilOL, [])
657-
(argsV', stmts4) <- castVars Signed $ zip argsV [width]
658-
(retV, s1) <- doExpr width $ Call StdCall fptr argsV' []
659-
(retVs', stmts5) <- castVars (cmmPrimOpRetValSignage op) [(retV,dstTy)]
660-
let retV' = singletonPanic "genCallSimpleCast" retVs'
661-
let s2 = Store retV' dstV Nothing []
662-
663-
let stmts = stmts2 `appOL` stmts4 `snocOL`
664-
s1 `appOL` stmts5 `snocOL` s2
665-
return (stmts, top2 ++ top3)
666-
genCallSimpleCast _ _ dsts _ =
667-
panic ("genCallSimpleCast: " ++ show (length dsts) ++ " dsts")
668-
669-
-- Handle simple function call that only need simple type casting, of the form:
670-
-- truncate arg >>= \a -> call(a) >>= zext
671-
--
672-
-- since GHC only really has i32 and i64 types and things like Word8 are backed
673-
-- by an i32 and just present a logical i8 range. So we must handle conversions
674-
-- from i32 to i8 explicitly as LLVM is strict about types.
675-
genCallSimpleCast2 :: Width -> ForeignTarget -> [CmmFormal] -> [CmmActual]
676-
-> LlvmM StmtData
677-
genCallSimpleCast2 w t@(PrimTarget op) [dst] args = do
678-
let width = widthToLlvmInt w
679-
dstTy = cmmToLlvmType $ localRegType dst
680-
681-
fname <- cmmPrimOpFunctions op
682-
(fptr, _, top3) <- getInstrinct fname width (const width <$> args)
683-
684-
(dstV, _dst_ty) <- getCmmReg (CmmLocal dst)
685-
686-
let (_, arg_hints) = foreignTargetHints t
687-
let args_hints = zip args arg_hints
688-
(argsV, stmts2, top2) <- arg_vars args_hints ([], nilOL, [])
689-
(argsV', stmts4) <- castVars Signed $ zip argsV (const width <$> argsV)
690-
(retV, s1) <- doExpr width $ Call StdCall fptr argsV' []
691-
(retVs', stmts5) <- castVars (cmmPrimOpRetValSignage op) [(retV,dstTy)]
692-
let retV' = singletonPanic "genCallSimpleCast2" retVs'
693-
let s2 = Store retV' dstV Nothing []
642+
genCallSimpleCast :: Width -> CallishMachOp -> CmmFormal -> [CmmActual]
643+
-> LlvmM StmtData
644+
genCallSimpleCast specW op dst args = do
645+
let width = widthToLlvmInt specW
646+
argsW = const width <$> args
647+
dstType = cmmToLlvmType $ localRegType dst
648+
signage = cmmPrimOpRetValSignage op
649+
650+
fname <- cmmPrimOpFunctions op
651+
(fptr, _, top3) <- getInstrinct fname width argsW
652+
(dstV, _dst_ty) <- getCmmReg (CmmLocal dst)
653+
let (_, arg_hints) = foreignTargetHints $ PrimTarget op
654+
let args_hints = zip args arg_hints
655+
(argsV, stmts2, top2) <- arg_vars args_hints ([], nilOL, [])
656+
(argsV', stmts4) <- castVars signage $ zip argsV argsW
657+
(retV, s1) <- doExpr width $ Call StdCall fptr argsV' []
658+
(retV', stmts5) <- castVar signage retV dstType
659+
let s2 = Store retV' dstV Nothing []
694660

695661
let stmts = stmts2 `appOL` stmts4 `snocOL`
696-
s1 `appOL` stmts5 `snocOL` s2
662+
s1 `snocOL` stmts5 `snocOL` s2
697663
return (stmts, top2 ++ top3)
698-
genCallSimpleCast2 _ _ dsts _ =
699-
panic ("genCallSimpleCast2: " ++ show (length dsts) ++ " dsts")
700664

701665
-- | Create a function pointer from a target.
702666
getFunPtrW :: (LMString -> LlvmType) -> ForeignTarget
@@ -811,11 +775,47 @@ castVar signage v t | getVarType v == t
811775
Signed -> LM_Sext
812776
Unsigned -> LM_Zext
813777

814-
815778
cmmPrimOpRetValSignage :: CallishMachOp -> Signage
816779
cmmPrimOpRetValSignage mop = case mop of
817-
MO_Pdep _ -> Unsigned
818-
MO_Pext _ -> Unsigned
780+
-- Some bit-wise operations /must/ always treat the input and output values
781+
-- as 'Unsigned' in order to return the expected result values when pre/post-
782+
-- operation bit-width truncation and/or extension occur. For example,
783+
-- consider the Bit-Reverse operation:
784+
--
785+
-- If the result of a Bit-Reverse is treated as signed,
786+
-- an positive input can result in an negative output, i.e.:
787+
--
788+
-- identity(0x03) = 0x03 = 00000011
789+
-- breverse(0x03) = 0xC0 = 11000000
790+
--
791+
-- Now if an extension is performed after the operation to
792+
-- promote a smaller bit-width value into a larger bit-width
793+
-- type, it is expected that the /bit-wise/ operations will
794+
-- not be treated /numerically/ as signed.
795+
--
796+
-- To illustrate the difference, consider how a signed extension
797+
-- for the type i16 to i32 differs for out values above:
798+
-- ext_zeroed(i32, breverse(0x03)) = 0x00C0 = 0000000011000000
799+
-- ext_signed(i32, breverse(0x03)) = 0xFFC0 = 1111111111000000
800+
--
801+
-- Here we can see that the former output is the expected result
802+
-- of a bit-wise operation which needs to be promoted to a larger
803+
-- bit-width type. The latter output is not desirable when we must
804+
-- constraining a value into a range of i16 within an i32 type.
805+
--
806+
-- Hence we always treat the "signage" as unsigned for Bit-Reverse!
807+
--
808+
-- The same reasoning applied to Bit-Reverse above applies to the other
809+
-- bit-wise operations; do not sign extend a possibly negated number!
810+
MO_BRev _ -> Unsigned
811+
MO_BSwap _ -> Unsigned
812+
MO_Clz _ -> Unsigned
813+
MO_Ctz _ -> Unsigned
814+
MO_Pdep _ -> Unsigned
815+
MO_Pext _ -> Unsigned
816+
MO_PopCnt _ -> Unsigned
817+
818+
-- All other cases, default to preserving the numeric sign when extending.
819819
_ -> Signed
820820

821821
-- | Decide what C function to use to implement a CallishMachOp

libraries/ghc-internal/cbits/pdep.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,20 +24,23 @@ hs_pdep64(StgWord64 src, StgWord64 mask)
2424
return result;
2525
}
2626

27+
// When dealing with values of bit-width shorter than uint64_t, ensure to
28+
// cast the return value to correctly truncate the undefined upper bits.
29+
// This is *VERY* important when GHC is using the LLVM backend!
2730
StgWord
2831
hs_pdep32(StgWord src, StgWord mask)
2932
{
30-
return hs_pdep64(src, mask);
33+
return (StgWord) ((StgWord32) hs_pdep64(src, mask));
3134
}
3235

3336
StgWord
3437
hs_pdep16(StgWord src, StgWord mask)
3538
{
36-
return hs_pdep64(src, mask);
39+
return (StgWord) ((StgWord16) hs_pdep64(src, mask));
3740
}
3841

3942
StgWord
4043
hs_pdep8(StgWord src, StgWord mask)
4144
{
42-
return hs_pdep64(src, mask);
45+
return (StgWord) ((StgWord8) hs_pdep64(src, mask));
4346
}

libraries/ghc-internal/cbits/pext.c

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
#include "Rts.h"
22
#include "MachDeps.h"
33

4-
StgWord64
5-
hs_pext64(StgWord64 src, StgWord64 mask)
4+
static StgWord64
5+
hs_pext(const unsigned char bit_width, const StgWord64 src, const StgWord64 mask)
66
{
77
uint64_t result = 0;
88
int offset = 0;
99

10-
for (int bit = 0; bit != sizeof(uint64_t) * 8; ++bit) {
10+
for (int bit = 0; bit != bit_width; ++bit) {
1111
const uint64_t src_bit = (src >> bit) & 1;
1212
const uint64_t mask_bit = (mask >> bit) & 1;
1313

@@ -20,20 +20,29 @@ hs_pext64(StgWord64 src, StgWord64 mask)
2020
return result;
2121
}
2222

23+
StgWord64
24+
hs_pext64(const StgWord64 src, const StgWord64 mask)
25+
{
26+
return hs_pext(64, src, mask);
27+
}
28+
29+
// When dealing with values of bit-width shorter than uint64_t, ensure to
30+
// cast the return value to correctly truncate the undefined upper bits.
31+
// This is *VERY* important when GHC is using the LLVM backend!
2332
StgWord
24-
hs_pext32(StgWord src, StgWord mask)
33+
hs_pext32(const StgWord src, const StgWord mask)
2534
{
26-
return hs_pext64(src, mask);
35+
return (StgWord) ((StgWord32) hs_pext(32, src, mask));
2736
}
2837

2938
StgWord
30-
hs_pext16(StgWord src, StgWord mask)
39+
hs_pext16(const StgWord src, const StgWord mask)
3140
{
32-
return hs_pext64(src, mask);
41+
return (StgWord) ((StgWord16) hs_pext(16, src, mask));
3342
}
3443

3544
StgWord
36-
hs_pext8(StgWord src, StgWord mask)
45+
hs_pext8(const StgWord src, const StgWord mask)
3746
{
38-
return hs_pext64(src, mask);
47+
return (StgWord) ((StgWord8) hs_pext(8, src, mask));
3948
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
-- Minimal reproducer for https://gitlab.haskell.org/ghc/ghc/-/issues/20645
2+
{-# LANGUAGE MagicHash #-}
3+
{-# LANGUAGE ExtendedLiterals #-}
4+
import GHC.Exts
5+
import GHC.Word
6+
import Numeric (showHex)
7+
8+
opaqueInt8# :: Int8# -> Int8#
9+
opaqueInt8# x = x
10+
{-# OPAQUE opaqueInt8# #-}
11+
12+
main :: IO ()
13+
main = let !x = opaqueInt8# 109#Int8
14+
!y = opaqueInt8# 1#Int8
15+
in putStrLn $ flip showHex "" (W# ( pext8#
16+
(word8ToWord# (int8ToWord8# (0#Int8 `subInt8#` x )))
17+
(word8ToWord# (int8ToWord8# (y `subInt8#` 4#Int8)))
18+
))
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
49

testsuite/tests/llvm/should_run/all.T

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,4 @@ test('T22487', [normal, normalise_errmsg_fun(ignore_llvm_and_vortex)], compile_a
1717
test('T22033', [normal, normalise_errmsg_fun(ignore_llvm_and_vortex)], compile_and_run, [''])
1818
test('T25730', [req_c, unless(arch('x86_64'), skip), normalise_errmsg_fun(ignore_llvm_and_vortex)], compile_and_run, ['T25730C.c'])
1919
# T25730C.c contains Intel instrinsics, so only run this test on x86
20+
test('T20645', [normal, normalise_errmsg_fun(ignore_llvm_and_vortex), when(have_llvm(), extra_ways(["optllvm"]))], compile_and_run, [''])

testsuite/tests/numeric/should_run/foundation.hs

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ module Main
2424
( main
2525
) where
2626

27+
import Data.Bits (Bits((.&.), bit))
2728
import Data.Word
2829
import Data.Int
2930
import GHC.Natural
@@ -408,6 +409,33 @@ instance TestPrimop (Word# -> Int# -> Word#) where
408409
testPrimop s l r = Property s $ \(uWord -> a1) (uInt -> a2) -> (wWord (l a1 a2)) === wWord (r a1 a2)
409410
-}
410411

412+
-- | A special data-type for representing functions where,
413+
-- since only some number of the lower bits are defined,
414+
-- testing for strict equality in the undefined upper bits is not appropriate!
415+
-- Without using this data-type, false-positive failures will be reported
416+
-- when the undefined bit regions do not match, even though the equality of bits
417+
-- in this undefined region has no bearing on correctness.
418+
data LowerBitsAreDefined =
419+
LowerBitsAreDefined
420+
{ definedLowerWidth :: Word
421+
-- ^ The (strictly-non-negative) number of least-significant bits
422+
-- for which the attached function is defined.
423+
, undefinedBehavior :: (Word# -> Word#)
424+
-- ^ Function with undefined behavior for some of its most significant bits.
425+
}
426+
427+
instance TestPrimop LowerBitsAreDefined where
428+
testPrimop s l r = Property s $ \ (uWord#-> x0) ->
429+
let -- Create a mask to unset all bits in the undefined area,
430+
-- leaving set bits only in the area of defined behavior.
431+
-- Since the upper bits are undefined,
432+
-- if the function defines behavior for the lower N bits,
433+
-- then /only/ the lower N bits are preserved,
434+
-- and the upper WORDSIZE - N bits are discarded.
435+
mask = bit (fromEnum (definedLowerWidth r)) - 1
436+
valL = wWord# (undefinedBehavior l x0) .&. mask
437+
valR = wWord# (undefinedBehavior r x0) .&. mask
438+
in valL === valR
411439

412440
twoNonZero :: (a -> a -> b) -> a -> NonZero a -> b
413441
twoNonZero f x (NonZero y) = f x y
@@ -655,13 +683,13 @@ testPrimops = Group "primop"
655683
, testPrimop "ctz32#" Primop.ctz32# Wrapper.ctz32#
656684
, testPrimop "ctz64#" Primop.ctz64# Wrapper.ctz64#
657685
, testPrimop "ctz#" Primop.ctz# Wrapper.ctz#
658-
, testPrimop "byteSwap16#" Primop.byteSwap16# Wrapper.byteSwap16#
659-
, testPrimop "byteSwap32#" Primop.byteSwap32# Wrapper.byteSwap32#
686+
, testPrimop "byteSwap16#" (16 `LowerBitsAreDefined` Primop.byteSwap16#) (16 `LowerBitsAreDefined` Wrapper.byteSwap16#)
687+
, testPrimop "byteSwap32#" (32 `LowerBitsAreDefined` Primop.byteSwap32#) (32 `LowerBitsAreDefined` Wrapper.byteSwap32#)
660688
, testPrimop "byteSwap64#" Primop.byteSwap64# Wrapper.byteSwap64#
661689
, testPrimop "byteSwap#" Primop.byteSwap# Wrapper.byteSwap#
662-
, testPrimop "bitReverse8#" Primop.bitReverse8# Wrapper.bitReverse8#
663-
, testPrimop "bitReverse16#" Primop.bitReverse16# Wrapper.bitReverse16#
664-
, testPrimop "bitReverse32#" Primop.bitReverse32# Wrapper.bitReverse32#
690+
, testPrimop "bitReverse8#" (8 `LowerBitsAreDefined` Primop.bitReverse8#) (8 `LowerBitsAreDefined` Wrapper.bitReverse8#)
691+
, testPrimop "bitReverse16#" (16 `LowerBitsAreDefined` Primop.bitReverse16#) (16 `LowerBitsAreDefined` Wrapper.bitReverse16#)
692+
, testPrimop "bitReverse32#" (32 `LowerBitsAreDefined` Primop.bitReverse32#) (32 `LowerBitsAreDefined` Wrapper.bitReverse32#)
665693
, testPrimop "bitReverse64#" Primop.bitReverse64# Wrapper.bitReverse64#
666694
, testPrimop "bitReverse#" Primop.bitReverse# Wrapper.bitReverse#
667695
, testPrimop "narrow8Int#" Primop.narrow8Int# Wrapper.narrow8Int#

utils/genprimopcode/Lexer.x

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ words :-
5656
<0> "CanFail" { mkT TCanFail }
5757
<0> "ThrowsException" { mkT TThrowsException }
5858
<0> "ReadWriteEffect" { mkT TReadWriteEffect }
59+
<0> "defined_bits" { mkT TDefinedBits }
5960
<0> "can_fail_warning" { mkT TCanFailWarnFlag }
6061
<0> "DoNotWarnCanFail" { mkT TDoNotWarnCanFail }
6162
<0> "WarnIfEffectIsCanFail" { mkT TWarnIfEffectIsCanFail }

0 commit comments

Comments
 (0)