File tree Expand file tree Collapse file tree 3 files changed +85
-14
lines changed
Expand file tree Collapse file tree 3 files changed +85
-14
lines changed Original file line number Diff line number Diff line change @@ -486,6 +486,30 @@ inline bool canEmitSelectWithArms(Expression* ifTrue, Expression* ifFalse) {
486486 return ifTrue->type .isSingle () && ifFalse->type .isSingle ();
487487}
488488
489+ // If this instruction accesses memory or the heap, or otherwise participates in
490+ // shared memory synchronization, return the memory order corresponding to the
491+ // kind of synchronization it does. Return MemoryOrder::Unordered if there is no
492+ // synchronization. Does not look at children.
493+ inline MemoryOrder getMemoryOrder (Expression* curr) {
494+ if (auto * get = curr->dynCast <StructGet>()) {
495+ return get->order ;
496+ }
497+ if (auto * set = curr->dynCast <StructSet>()) {
498+ return set->order ;
499+ }
500+ if (auto * load = curr->dynCast <Load>()) {
501+ return load->isAtomic ? MemoryOrder::SeqCst : MemoryOrder::Unordered;
502+ }
503+ if (auto * store = curr->dynCast <Store>()) {
504+ return store->isAtomic ? MemoryOrder::SeqCst : MemoryOrder::Unordered;
505+ }
506+ if (curr->is <AtomicRMW>() || curr->is <AtomicWait>() ||
507+ curr->is <AtomicNotify>() || curr->is <AtomicFence>()) {
508+ return MemoryOrder::SeqCst;
509+ }
510+ return MemoryOrder::Unordered;
511+ }
512+
489513// A "generative" expression is one that can generate different results for the
490514// same inputs, and that difference is *not* explained by other expressions that
491515// interact with this one. This is an intrinsic/internal property of the
Original file line number Diff line number Diff line change @@ -150,20 +150,14 @@ struct GUFAOptimizer
150150 return ;
151151 }
152152
153- if (contents.isNull () && curr->type .isNullable ()) {
154- // Null values are all identical, so just fix up the type here if we need
155- // to (the null's type might not fit in this expression, if it passed
156- // through casts).
157- if (!Type::isSubType (contents.getType (), curr->type )) {
158- contents = PossibleContents::literal (
159- Literal::makeNull (curr->type .getHeapType ()));
160- }
161-
162- // Note that if curr's type is *not* nullable, then the code will trap at
163- // runtime (the null must arrive through a cast that will trap). We handle
164- // that below, so we don't need to think about it here.
165-
166- // TODO: would emitting a more specific null be useful when valid?
153+ if (Properties::getMemoryOrder (curr) != MemoryOrder::Unordered) {
154+ // This load might synchronize with some store, and if we replaced the
155+ // load with a constant or with a load from a global, it would not
156+ // synchronize with that store anymore. Since we know what value the store
157+ // must write, and we know it is the same as every other store to the same
158+ // location, it's possible that optimizing here would be allowable, but
159+ // for now be conservative and do not optimize.
160+ return ;
167161 }
168162
169163 auto * c = contents.makeExpression (wasm);
Original file line number Diff line number Diff line change 61376137 )
61386138 )
61396139)
6140+
6141+ ;; Atomic accesses require special handling
6142+ (module
6143+ ;; CHECK: (type $A (shared (struct (field i32))))
6144+ (type $A (shared (struct (field i32 ))))
6145+
6146+ ;; CHECK: (type $1 (func))
6147+
6148+ ;; CHECK: (func $gets (type $1)
6149+ ;; CHECK-NEXT: (local $0 (ref $A))
6150+ ;; CHECK-NEXT: (local.set $0
6151+ ;; CHECK-NEXT: (struct.new_default $A)
6152+ ;; CHECK-NEXT: )
6153+ ;; CHECK-NEXT: (drop
6154+ ;; CHECK-NEXT: (i32.const 0)
6155+ ;; CHECK-NEXT: )
6156+ ;; CHECK-NEXT: (drop
6157+ ;; CHECK-NEXT: (struct.atomic.get acqrel $A 0
6158+ ;; CHECK-NEXT: (local.get $0)
6159+ ;; CHECK-NEXT: )
6160+ ;; CHECK-NEXT: )
6161+ ;; CHECK-NEXT: (drop
6162+ ;; CHECK-NEXT: (struct.atomic.get $A 0
6163+ ;; CHECK-NEXT: (local.get $0)
6164+ ;; CHECK-NEXT: )
6165+ ;; CHECK-NEXT: )
6166+ ;; CHECK-NEXT: )
6167+ (func $gets
6168+ (local (ref $A ))
6169+ (local.set 0
6170+ (struct.new_default $A )
6171+ )
6172+ (drop
6173+ ;; This is optimizable. It reads from shared memory, but there is only one
6174+ ;; possible value that can be read.
6175+ (struct.get $A 0
6176+ (local.get 0 )
6177+ )
6178+ )
6179+ (drop
6180+ ;; We do not (yet) optimize atomic gets.
6181+ (struct.atomic.get acqrel $A 0
6182+ (local.get 0 )
6183+ )
6184+ )
6185+ (drop
6186+ ;; We do not (yet) optimize atomic gets.
6187+ (struct.atomic.get $A 0
6188+ (local.get 0 )
6189+ )
6190+ )
6191+ )
6192+ )
You can’t perform that action at this time.
0 commit comments