Skip to content

Commit 8623f73

Browse files
authored
Optimize struct RMW ops in OptimizeInstructions (#7225)
When the RMW operation can be proven not to change the accessed value, optimize it to a simple atomic get instead. This is valid because a write that does not change an in-memory value does not synchronize with any subsequent reads of that value, since those reads can be considered to be reading from the previous write. Also optimize RMW operations on unshared structs to their non-atomic equivalent operations. This can increase code size, but can also enable follow-on optimizations of the simpler operations and can be less expensive at runtime.
1 parent 42faa40 commit 8623f73

File tree

3 files changed

+1459
-0
lines changed

3 files changed

+1459
-0
lines changed

scripts/test/fuzzing.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@
8383
'gc-atomics-null-refs.wast',
8484
'shared-structs.wast',
8585
'heap2local-rmw.wast',
86+
'optimize-instructions-struct-rmw.wast',
8687
# contains too many segments to run in a wasm VM
8788
'limit-segments_disable-bulk-memory.wast',
8889
# https://github.com/WebAssembly/binaryen/issues/7176

src/passes/OptimizeInstructions.cpp

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1862,6 +1862,194 @@ struct OptimizeInstructions
18621862
}
18631863
}
18641864

1865+
void visitStructRMW(StructRMW* curr) {
1866+
skipNonNullCast(curr->ref, curr);
1867+
if (trapOnNull(curr, curr->ref)) {
1868+
return;
1869+
}
1870+
1871+
if (!curr->ref->type.isStruct()) {
1872+
return;
1873+
}
1874+
1875+
Builder builder(*getModule());
1876+
1877+
// Even when the RMW access is to shared memory, we can optimize out the
1878+
// modify and write parts if we know that the modified value is the same as
1879+
// the original value. This is valid because reads from writes that don't
1880+
// change the in-memory value can be considered to be reads from the
1881+
// previous write to the same location instead. That means there is no read
1882+
// that necessarily synchronizes with the write.
1883+
auto* value =
1884+
Properties::getFallthrough(curr->value, getPassOptions(), *getModule());
1885+
if (Properties::isSingleConstantExpression(value)) {
1886+
auto val = Properties::getLiteral(value);
1887+
bool canOptimize = false;
1888+
switch (curr->op) {
1889+
case RMWAdd:
1890+
case RMWSub:
1891+
case RMWOr:
1892+
case RMWXor:
1893+
canOptimize = val.getInteger() == 0;
1894+
break;
1895+
case RMWAnd:
1896+
canOptimize = val == Literal::makeNegOne(val.type);
1897+
break;
1898+
case RMWXchg:
1899+
canOptimize = false;
1900+
break;
1901+
}
1902+
if (canOptimize) {
1903+
replaceCurrent(builder.makeStructGet(
1904+
curr->index,
1905+
getResultOfFirst(curr->ref, builder.makeDrop(curr->value)),
1906+
curr->order,
1907+
curr->type));
1908+
return;
1909+
}
1910+
}
1911+
1912+
if (curr->ref->type.getHeapType().isShared()) {
1913+
return;
1914+
}
1915+
1916+
// Lower the RMW to its more basic operations. Breaking the atomic
1917+
// operation into several non-atomic operations is safe because no other
1918+
// thread can observe an intermediate state in the unshared memory. This
1919+
// initially increases code size, but the more basic operations may be
1920+
// more optimizable than the original RMW.
1921+
// TODO: Experiment to determine whether this is worthwhile on real code.
1922+
// Maybe we should do this optimization only when optimizing for speed over
1923+
// size.
1924+
auto ref = builder.addVar(getFunction(), curr->ref->type);
1925+
auto val = builder.addVar(getFunction(), curr->type);
1926+
auto result = builder.addVar(getFunction(), curr->type);
1927+
auto* block = builder.makeBlock(
1928+
{builder.makeLocalSet(ref, curr->ref),
1929+
builder.makeLocalSet(val, curr->value),
1930+
builder.makeLocalSet(
1931+
result,
1932+
builder.makeStructGet(curr->index,
1933+
builder.makeLocalGet(ref, curr->ref->type),
1934+
MemoryOrder::Unordered,
1935+
curr->type))});
1936+
Expression* newVal = nullptr;
1937+
if (curr->op == RMWXchg) {
1938+
newVal = builder.makeLocalGet(val, curr->type);
1939+
} else {
1940+
Abstract::Op binop = Abstract::Add;
1941+
switch (curr->op) {
1942+
case RMWAdd:
1943+
binop = Abstract::Add;
1944+
break;
1945+
case RMWSub:
1946+
binop = Abstract::Sub;
1947+
break;
1948+
case RMWAnd:
1949+
binop = Abstract::And;
1950+
break;
1951+
case RMWOr:
1952+
binop = Abstract::Or;
1953+
break;
1954+
case RMWXor:
1955+
binop = Abstract::Xor;
1956+
break;
1957+
case RMWXchg:
1958+
WASM_UNREACHABLE("unexpected op");
1959+
}
1960+
newVal = builder.makeBinary(Abstract::getBinary(curr->type, binop),
1961+
builder.makeLocalGet(result, curr->type),
1962+
builder.makeLocalGet(val, curr->type));
1963+
}
1964+
block->list.push_back(
1965+
builder.makeStructSet(curr->index,
1966+
builder.makeLocalGet(ref, curr->ref->type),
1967+
newVal,
1968+
MemoryOrder::Unordered));
1969+
1970+
// We must maintain this operation's effect on the global order of seqcst
1971+
// operations.
1972+
if (curr->order == MemoryOrder::SeqCst) {
1973+
block->list.push_back(builder.makeAtomicFence());
1974+
}
1975+
1976+
block->list.push_back(builder.makeLocalGet(result, curr->type));
1977+
block->type = curr->type;
1978+
replaceCurrent(block);
1979+
}
1980+
1981+
void visitStructCmpxchg(StructCmpxchg* curr) {
1982+
skipNonNullCast(curr->ref, curr);
1983+
if (trapOnNull(curr, curr->ref)) {
1984+
return;
1985+
}
1986+
1987+
if (!curr->ref->type.isStruct()) {
1988+
return;
1989+
}
1990+
1991+
Builder builder(*getModule());
1992+
1993+
// Just like other RMW operations, cmpxchg can be optimized to just a read
1994+
// if it is known not to change the in-memory value. This is the case when
1995+
// `expected` and `replacement` are known to be the same.
1996+
if (areConsecutiveInputsEqual(curr->expected, curr->replacement)) {
1997+
auto* ref = getResultOfFirst(
1998+
curr->ref,
1999+
builder.makeSequence(builder.makeDrop(curr->expected),
2000+
builder.makeDrop(curr->replacement)));
2001+
replaceCurrent(
2002+
builder.makeStructGet(curr->index, ref, curr->order, curr->type));
2003+
return;
2004+
}
2005+
2006+
if (curr->ref->type.getHeapType().isShared()) {
2007+
return;
2008+
}
2009+
2010+
// Just like other RMW operations, lower to basic operations when operating
2011+
// on unshared memory.
2012+
auto ref = builder.addVar(getFunction(), curr->ref->type);
2013+
auto expected = builder.addVar(getFunction(), curr->type);
2014+
auto replacement = builder.addVar(getFunction(), curr->type);
2015+
auto result = builder.addVar(getFunction(), curr->type);
2016+
auto* block =
2017+
builder.makeBlock({builder.makeLocalSet(ref, curr->ref),
2018+
builder.makeLocalSet(expected, curr->expected),
2019+
builder.makeLocalSet(replacement, curr->replacement)});
2020+
auto* lhs = builder.makeLocalTee(
2021+
result,
2022+
builder.makeStructGet(curr->index,
2023+
builder.makeLocalGet(ref, curr->ref->type),
2024+
MemoryOrder::Unordered,
2025+
curr->type),
2026+
curr->type);
2027+
auto* rhs = builder.makeLocalGet(expected, curr->type);
2028+
Expression* pred = nullptr;
2029+
if (curr->type.isRef()) {
2030+
pred = builder.makeRefEq(lhs, rhs);
2031+
} else {
2032+
pred = builder.makeBinary(
2033+
Abstract::getBinary(curr->type, Abstract::Eq), lhs, rhs);
2034+
}
2035+
block->list.push_back(builder.makeIf(
2036+
pred,
2037+
builder.makeStructSet(curr->index,
2038+
builder.makeLocalGet(ref, curr->ref->type),
2039+
builder.makeLocalGet(replacement, curr->type),
2040+
MemoryOrder::Unordered)));
2041+
2042+
// We must maintain this operation's effect on the global order of seqcst
2043+
// operations.
2044+
if (curr->order == MemoryOrder::SeqCst) {
2045+
block->list.push_back(builder.makeAtomicFence());
2046+
}
2047+
2048+
block->list.push_back(builder.makeLocalGet(result, curr->type));
2049+
block->type = curr->type;
2050+
replaceCurrent(block);
2051+
}
2052+
18652053
void visitArrayNew(ArrayNew* curr) {
18662054
// If a value is provided, we can optimize in some cases.
18672055
if (curr->type == Type::unreachable || curr->isWithDefault()) {

0 commit comments

Comments
 (0)