Skip to content

Commit d444abd

Browse files
authored
Handle atomic accesses in Heap2Local (#7158)
Heap2Local replaces gets and sets of non-escaping heap allocations with gets and sets of locals. Since the accessed data does not escape, it cannot be used to directly synchronize with other threads, so this optimization is generally safe even in the presence of shared structs and atomic struct accesses. The only caveat is that sequentially consistent accesses additionally participate in the global ordering of sequentially consistent operations, and that effect on the global ordering cannot be removed. Insert seqcst fences to maintain this global synchronization when removing sequentially consistent gets and sets.
1 parent 7c8cd2f commit d444abd

File tree

2 files changed

+122
-3
lines changed

2 files changed

+122
-3
lines changed

src/passes/Heap2Local.cpp

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -839,9 +839,19 @@ struct Struct2Local : PostWalker<Struct2Local> {
839839

840840
// Drop the ref (leaving it to other opts to remove, when possible), and
841841
// write the data to the local instead of the heap allocation.
842-
replaceCurrent(builder.makeSequence(
842+
auto* replacement = builder.makeSequence(
843843
builder.makeDrop(curr->ref),
844-
builder.makeLocalSet(localIndexes[curr->index], curr->value)));
844+
builder.makeLocalSet(localIndexes[curr->index], curr->value));
845+
846+
// This struct.set cannot possibly synchronize with other threads via the
847+
// read value, since the struct never escapes this function. But if the set
848+
// is sequentially consistent, it still participates in the global order of
849+
// sequentially consistent operations. Preserve this effect on the global
850+
// ordering by inserting a fence.
851+
if (curr->order == MemoryOrder::SeqCst) {
852+
replacement = builder.blockify(replacement, builder.makeAtomicFence());
853+
}
854+
replaceCurrent(replacement);
845855
}
846856

847857
void visitStructGet(StructGet* curr) {
@@ -873,7 +883,13 @@ struct Struct2Local : PostWalker<Struct2Local> {
873883
// general. However, signed gets make that more complicated, so leave this
874884
// for other opts to handle.
875885
value = Bits::makePackedFieldGet(value, field, curr->signed_, wasm);
876-
replaceCurrent(builder.makeSequence(builder.makeDrop(curr->ref), value));
886+
auto* replacement = builder.blockify(builder.makeDrop(curr->ref));
887+
// See the note on seqcst struct.set. It is ok to insert the fence before
888+
// the value here since we know the value is just a local.get.
889+
if (curr->order == MemoryOrder::SeqCst) {
890+
replacement = builder.blockify(replacement, builder.makeAtomicFence());
891+
}
892+
replaceCurrent(builder.blockify(replacement, value));
877893
}
878894
};
879895

test/lit/passes/heap2local.wast

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4608,3 +4608,106 @@
46084608
)
46094609
)
46104610
)
4611+
4612+
;; Atomic accesses need special handling
4613+
(module
4614+
;; CHECK: (type $0 (func))
4615+
4616+
;; CHECK: (type $struct (shared (struct (field (mut i32)))))
4617+
(type $struct (shared (struct (field (mut i32)))))
4618+
4619+
;; CHECK: (func $acqrel (type $0)
4620+
;; CHECK-NEXT: (local $0 (ref null $struct))
4621+
;; CHECK-NEXT: (local $1 i32)
4622+
;; CHECK-NEXT: (drop
4623+
;; CHECK-NEXT: (block (result (ref null (shared none)))
4624+
;; CHECK-NEXT: (local.set $1
4625+
;; CHECK-NEXT: (i32.const 0)
4626+
;; CHECK-NEXT: )
4627+
;; CHECK-NEXT: (ref.null (shared none))
4628+
;; CHECK-NEXT: )
4629+
;; CHECK-NEXT: )
4630+
;; CHECK-NEXT: (drop
4631+
;; CHECK-NEXT: (block (result i32)
4632+
;; CHECK-NEXT: (drop
4633+
;; CHECK-NEXT: (ref.null (shared none))
4634+
;; CHECK-NEXT: )
4635+
;; CHECK-NEXT: (local.get $1)
4636+
;; CHECK-NEXT: )
4637+
;; CHECK-NEXT: )
4638+
;; CHECK-NEXT: (block
4639+
;; CHECK-NEXT: (drop
4640+
;; CHECK-NEXT: (ref.null (shared none))
4641+
;; CHECK-NEXT: )
4642+
;; CHECK-NEXT: (local.set $1
4643+
;; CHECK-NEXT: (i32.const 0)
4644+
;; CHECK-NEXT: )
4645+
;; CHECK-NEXT: )
4646+
;; CHECK-NEXT: )
4647+
(func $acqrel
4648+
(local (ref null $struct))
4649+
(local.set 0
4650+
(struct.new_default $struct)
4651+
)
4652+
;; acqrel accesses to non-escaping structs cannot synchronize with other
4653+
;; threads, so we can optimize normally.
4654+
(drop
4655+
(struct.atomic.get acqrel $struct 0
4656+
(local.get 0)
4657+
)
4658+
)
4659+
(struct.atomic.set acqrel $struct 0
4660+
(local.get 0)
4661+
(i32.const 0)
4662+
)
4663+
)
4664+
4665+
;; CHECK: (func $seqcst (type $0)
4666+
;; CHECK-NEXT: (local $0 (ref null $struct))
4667+
;; CHECK-NEXT: (local $1 i32)
4668+
;; CHECK-NEXT: (drop
4669+
;; CHECK-NEXT: (block (result (ref null (shared none)))
4670+
;; CHECK-NEXT: (local.set $1
4671+
;; CHECK-NEXT: (i32.const 0)
4672+
;; CHECK-NEXT: )
4673+
;; CHECK-NEXT: (ref.null (shared none))
4674+
;; CHECK-NEXT: )
4675+
;; CHECK-NEXT: )
4676+
;; CHECK-NEXT: (drop
4677+
;; CHECK-NEXT: (block (result i32)
4678+
;; CHECK-NEXT: (drop
4679+
;; CHECK-NEXT: (ref.null (shared none))
4680+
;; CHECK-NEXT: )
4681+
;; CHECK-NEXT: (atomic.fence)
4682+
;; CHECK-NEXT: (local.get $1)
4683+
;; CHECK-NEXT: )
4684+
;; CHECK-NEXT: )
4685+
;; CHECK-NEXT: (block
4686+
;; CHECK-NEXT: (drop
4687+
;; CHECK-NEXT: (ref.null (shared none))
4688+
;; CHECK-NEXT: )
4689+
;; CHECK-NEXT: (local.set $1
4690+
;; CHECK-NEXT: (i32.const 0)
4691+
;; CHECK-NEXT: )
4692+
;; CHECK-NEXT: (atomic.fence)
4693+
;; CHECK-NEXT: )
4694+
;; CHECK-NEXT: )
4695+
(func $seqcst
4696+
(local (ref null $struct))
4697+
(local.set 0
4698+
(struct.new_default $struct)
4699+
)
4700+
;; seqcst accesses participate in the global ordering of seqcst operations,
4701+
;; so they need to be replaced with a seqcst fence to maintain that
4702+
;; ordering.
4703+
(drop
4704+
(struct.atomic.get $struct 0
4705+
(local.get 0)
4706+
)
4707+
)
4708+
(struct.atomic.set $struct 0
4709+
(local.get 0)
4710+
(i32.const 0)
4711+
)
4712+
)
4713+
)

0 commit comments

Comments
 (0)