@@ -1862,6 +1862,194 @@ struct OptimizeInstructions
18621862 }
18631863 }
18641864
1865+ void visitStructRMW (StructRMW* curr) {
1866+ skipNonNullCast (curr->ref , curr);
1867+ if (trapOnNull (curr, curr->ref )) {
1868+ return ;
1869+ }
1870+
1871+ if (!curr->ref ->type .isStruct ()) {
1872+ return ;
1873+ }
1874+
1875+ Builder builder (*getModule ());
1876+
1877+ // Even when the RMW access is to shared memory, we can optimize out the
1878+ // modify and write parts if we know that the modified value is the same as
1879+ // the original value. This is valid because reads from writes that don't
1880+ // change the in-memory value can be considered to be reads from the
1881+ // previous write to the same location instead. That means there is no read
1882+ // that necessarily synchronizes with the write.
1883+ auto * value =
1884+ Properties::getFallthrough (curr->value , getPassOptions (), *getModule ());
1885+ if (Properties::isSingleConstantExpression (value)) {
1886+ auto val = Properties::getLiteral (value);
1887+ bool canOptimize = false ;
1888+ switch (curr->op ) {
1889+ case RMWAdd:
1890+ case RMWSub:
1891+ case RMWOr:
1892+ case RMWXor:
1893+ canOptimize = val.getInteger () == 0 ;
1894+ break ;
1895+ case RMWAnd:
1896+ canOptimize = val == Literal::makeNegOne (val.type );
1897+ break ;
1898+ case RMWXchg:
1899+ canOptimize = false ;
1900+ break ;
1901+ }
1902+ if (canOptimize) {
1903+ replaceCurrent (builder.makeStructGet (
1904+ curr->index ,
1905+ getResultOfFirst (curr->ref , builder.makeDrop (curr->value )),
1906+ curr->order ,
1907+ curr->type ));
1908+ return ;
1909+ }
1910+ }
1911+
1912+ if (curr->ref ->type .getHeapType ().isShared ()) {
1913+ return ;
1914+ }
1915+
1916+ // Lower the RMW to its more basic operations. Breaking the atomic
1917+ // operation into several non-atomic operations is safe because no other
1918+ // thread can observe an intermediate state in the unshared memory. This
1919+ // initially increases code size, but the more basic operations may be
1920+ // more optimizable than the original RMW.
1921+ // TODO: Experiment to determine whether this is worthwhile on real code.
1922+ // Maybe we should do this optimization only when optimizing for speed over
1923+ // size.
1924+ auto ref = builder.addVar (getFunction (), curr->ref ->type );
1925+ auto val = builder.addVar (getFunction (), curr->type );
1926+ auto result = builder.addVar (getFunction (), curr->type );
1927+ auto * block = builder.makeBlock (
1928+ {builder.makeLocalSet (ref, curr->ref ),
1929+ builder.makeLocalSet (val, curr->value ),
1930+ builder.makeLocalSet (
1931+ result,
1932+ builder.makeStructGet (curr->index ,
1933+ builder.makeLocalGet (ref, curr->ref ->type ),
1934+ MemoryOrder::Unordered,
1935+ curr->type ))});
1936+ Expression* newVal = nullptr ;
1937+ if (curr->op == RMWXchg) {
1938+ newVal = builder.makeLocalGet (val, curr->type );
1939+ } else {
1940+ Abstract::Op binop = Abstract::Add;
1941+ switch (curr->op ) {
1942+ case RMWAdd:
1943+ binop = Abstract::Add;
1944+ break ;
1945+ case RMWSub:
1946+ binop = Abstract::Sub;
1947+ break ;
1948+ case RMWAnd:
1949+ binop = Abstract::And;
1950+ break ;
1951+ case RMWOr:
1952+ binop = Abstract::Or;
1953+ break ;
1954+ case RMWXor:
1955+ binop = Abstract::Xor;
1956+ break ;
1957+ case RMWXchg:
1958+ WASM_UNREACHABLE (" unexpected op" );
1959+ }
1960+ newVal = builder.makeBinary (Abstract::getBinary (curr->type , binop),
1961+ builder.makeLocalGet (result, curr->type ),
1962+ builder.makeLocalGet (val, curr->type ));
1963+ }
1964+ block->list .push_back (
1965+ builder.makeStructSet (curr->index ,
1966+ builder.makeLocalGet (ref, curr->ref ->type ),
1967+ newVal,
1968+ MemoryOrder::Unordered));
1969+
1970+ // We must maintain this operation's effect on the global order of seqcst
1971+ // operations.
1972+ if (curr->order == MemoryOrder::SeqCst) {
1973+ block->list .push_back (builder.makeAtomicFence ());
1974+ }
1975+
1976+ block->list .push_back (builder.makeLocalGet (result, curr->type ));
1977+ block->type = curr->type ;
1978+ replaceCurrent (block);
1979+ }
1980+
1981+ void visitStructCmpxchg (StructCmpxchg* curr) {
1982+ skipNonNullCast (curr->ref , curr);
1983+ if (trapOnNull (curr, curr->ref )) {
1984+ return ;
1985+ }
1986+
1987+ if (!curr->ref ->type .isStruct ()) {
1988+ return ;
1989+ }
1990+
1991+ Builder builder (*getModule ());
1992+
1993+ // Just like other RMW operations, cmpxchg can be optimized to just a read
1994+ // if it is known not to change the in-memory value. This is the case when
1995+ // `expected` and `replacement` are known to be the same.
1996+ if (areConsecutiveInputsEqual (curr->expected , curr->replacement )) {
1997+ auto * ref = getResultOfFirst (
1998+ curr->ref ,
1999+ builder.makeSequence (builder.makeDrop (curr->expected ),
2000+ builder.makeDrop (curr->replacement )));
2001+ replaceCurrent (
2002+ builder.makeStructGet (curr->index , ref, curr->order , curr->type ));
2003+ return ;
2004+ }
2005+
2006+ if (curr->ref ->type .getHeapType ().isShared ()) {
2007+ return ;
2008+ }
2009+
2010+ // Just like other RMW operations, lower to basic operations when operating
2011+ // on unshared memory.
2012+ auto ref = builder.addVar (getFunction (), curr->ref ->type );
2013+ auto expected = builder.addVar (getFunction (), curr->type );
2014+ auto replacement = builder.addVar (getFunction (), curr->type );
2015+ auto result = builder.addVar (getFunction (), curr->type );
2016+ auto * block =
2017+ builder.makeBlock ({builder.makeLocalSet (ref, curr->ref ),
2018+ builder.makeLocalSet (expected, curr->expected ),
2019+ builder.makeLocalSet (replacement, curr->replacement )});
2020+ auto * lhs = builder.makeLocalTee (
2021+ result,
2022+ builder.makeStructGet (curr->index ,
2023+ builder.makeLocalGet (ref, curr->ref ->type ),
2024+ MemoryOrder::Unordered,
2025+ curr->type ),
2026+ curr->type );
2027+ auto * rhs = builder.makeLocalGet (expected, curr->type );
2028+ Expression* pred = nullptr ;
2029+ if (curr->type .isRef ()) {
2030+ pred = builder.makeRefEq (lhs, rhs);
2031+ } else {
2032+ pred = builder.makeBinary (
2033+ Abstract::getBinary (curr->type , Abstract::Eq), lhs, rhs);
2034+ }
2035+ block->list .push_back (builder.makeIf (
2036+ pred,
2037+ builder.makeStructSet (curr->index ,
2038+ builder.makeLocalGet (ref, curr->ref ->type ),
2039+ builder.makeLocalGet (replacement, curr->type ),
2040+ MemoryOrder::Unordered)));
2041+
2042+ // We must maintain this operation's effect on the global order of seqcst
2043+ // operations.
2044+ if (curr->order == MemoryOrder::SeqCst) {
2045+ block->list .push_back (builder.makeAtomicFence ());
2046+ }
2047+
2048+ block->list .push_back (builder.makeLocalGet (result, curr->type ));
2049+ block->type = curr->type ;
2050+ replaceCurrent (block);
2051+ }
2052+
18652053 void visitArrayNew (ArrayNew* curr) {
18662054 // If a value is provided, we can optimize in some cases.
18672055 if (curr->type == Type::unreachable || curr->isWithDefault ()) {
0 commit comments