Skip to content

Commit c8c0ae3

Browse files
committed
Better control pointer alignments.
It seems that either llvm-hs or LLVM itself assumes that a pointer load or store that doesn't indicate an explicit alignment is aligned to the size of pointee. But this is wrong for vector loads from multidimensional Dex arrays---the minor dimension need only be aligned on the size of a single array element, not the whole vector.
1 parent c489415 commit c8c0ae3

File tree

2 files changed

+39
-3
lines changed

2 files changed

+39
-3
lines changed

src/lib/ImpToLLVM.hs

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -902,16 +902,33 @@ withWidthOfFP x template = case typeOf template of
902902
L.FloatingPointType L.FloatFP -> litVal $ Float32Lit $ realToFrac x
903903
_ -> error $ "Unsupported floating point type: " ++ show (typeOf template)
904904

905+
-- If we are accessing a `L.Type` from a Dex array, what memory alignment (in
906+
-- bytes) can we guarantee? This is probably better expressed in Dex types, but
907+
-- we would need to plumb them to do it that way. 1-byte alignment should
908+
-- always be safe, but we can promise higher-performance alignments for some
909+
-- types.
910+
dexAlignment :: L.Type -> Word32
911+
dexAlignment = \case
912+
L.IntegerType bits | bits `mod` 8 == 0 -> bits `div` 8
913+
L.IntegerType _ -> 1
914+
L.PointerType _ _ -> 4
915+
L.FloatingPointType L.FloatFP -> 4
916+
L.FloatingPointType L.DoubleFP -> 8
917+
L.VectorType _ eltTy -> dexAlignment eltTy
918+
_ -> 1
919+
905920
store :: LLVMBuilder m => Operand -> Operand -> m ()
906-
store ptr x = addInstr $ L.Do $ L.Store False ptr x Nothing 0 []
921+
store ptr x = addInstr $ L.Do $ L.Store False ptr x Nothing alignment [] where
922+
alignment = dexAlignment $ typeOf x
907923

908924
load :: LLVMBuilder m => L.Type -> Operand -> m Operand
909925
load pointeeTy ptr =
910926
#if MIN_VERSION_llvm_hs(15,0,0)
911-
emitInstr pointeeTy $ L.Load False pointeeTy ptr Nothing 0 []
927+
emitInstr pointeeTy $ L.Load False pointeeTy ptr Nothing alignment []
912928
#else
913-
emitInstr pointeeTy $ L.Load False ptr Nothing 0 []
929+
emitInstr pointeeTy $ L.Load False ptr Nothing alignment []
914930
#endif
931+
where alignment = dexAlignment pointeeTy
915932

916933
ilt :: LLVMBuilder m => Operand -> Operand -> m Operand
917934
ilt x y = emitInstr i1 $ L.ICmp IP.SLT x y []

tests/opt-tests.dx

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,3 +227,22 @@ _ = yield_accum (AddMonoid Int32) \result.
227227
-- CHECK: [[xsix:v#[0-9]+]]:<16xInt32> =
228228
-- CHECK-NEXT: vslice
229229
-- CHECK: extend [[refix]] [[xsix]]
230+
231+
"Non-aligned"
232+
-- CHECK-LABEL: Non-aligned
233+
234+
-- This is a regression test. We are checking that Dex-side
235+
-- vectorization does not end up assuming that arrays are aligned on
236+
-- the size of the vectors, only on the size of the underlying
237+
-- scalars.
238+
239+
non_aligned = for i:(Fin 7). for j:(Fin 257). +0
240+
241+
%passes llvm
242+
_ = yield_accum (AddMonoid Int32) \result.
243+
tile((Fin 257), 32) \set.
244+
for_ i:set.
245+
ix = inject(i, to=(Fin 257))
246+
result!(6@(Fin 7))!ix += non_aligned[6@_][ix]
247+
-- CHECK: load <16 x i32>, <16 x i32>* %"v#{{[0-9]+}}", align 4
248+
-- CHECK: store <16 x i32> %"v#{{[0-9]+}}", <16 x i32>* %"v#{{[0-9]+}}", align 4

0 commit comments

Comments
 (0)