|
| 1 | +/* |
| 2 | + * Copyright 2017 WebAssembly Community Group participants |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + */ |
| 16 | + |
| 17 | +// |
| 18 | +// Eliminate redundant set_locals: if a local already has a particular |
| 19 | +// value, we don't need to set it again. A common case here is loops |
| 20 | +// that start at zero, since the default value is initialized to |
| 21 | +// zero anyhow. |
| 22 | +// |
| 23 | +// A risk here is that we extend live ranges, e.g. we may use the default |
| 24 | +// value at the very end of a function, keeping that local alive throughout. |
| 25 | +// For that reason it is probably better to run this near the end of |
| 26 | +// optimization, and especially after coalesce-locals. A final vaccum |
| 27 | +// should be done after it, as this pass can leave around drop()s of |
| 28 | +// values no longer necessary. |
| 29 | +// |
| 30 | +// So far this tracks constant values, and for everything else it considers |
| 31 | +// them unique (so each set_local of a non-constant is a unique value, each |
| 32 | +// merge is a unique value, etc.; there is no sophisticated value numbering |
| 33 | +// here). |
| 34 | +// |
| 35 | + |
| 36 | +#include <wasm.h> |
| 37 | +#include <pass.h> |
| 38 | +#include <wasm-builder.h> |
| 39 | +#include <cfg/cfg-traversal.h> |
| 40 | +#include <ir/literal-utils.h> |
| 41 | +#include <ir/utils.h> |
| 42 | +#include <support/unique_deferring_queue.h> |
| 43 | + |
| 44 | +namespace wasm { |
| 45 | + |
| 46 | +// We do a very simple numbering of local values, just a unique |
| 47 | +// number for constants so far, enough to see |
| 48 | +// trivial duplication. LocalValues maps each local index to |
| 49 | +// its current value |
| 50 | +typedef std::vector<Index> LocalValues; |
| 51 | + |
| 52 | +// information in a basic block |
| 53 | +struct Info { |
| 54 | + LocalValues start, end; // the local values at the start and end of the block |
| 55 | + std::vector<Expression**> setps; |
| 56 | +}; |
| 57 | + |
| 58 | +struct RedundantSetElimination : public WalkerPass<CFGWalker<RedundantSetElimination, Visitor<RedundantSetElimination>, Info>> { |
| 59 | + bool isFunctionParallel() override { return true; } |
| 60 | + |
| 61 | + Pass* create() override { return new RedundantSetElimination(); } |
| 62 | + |
| 63 | + Index numLocals; |
| 64 | + |
| 65 | + // cfg traversal work |
| 66 | + |
| 67 | + static void doVisitSetLocal(RedundantSetElimination* self, Expression** currp) { |
| 68 | + if (self->currBasicBlock) { |
| 69 | + self->currBasicBlock->contents.setps.push_back(currp); |
| 70 | + } |
| 71 | + } |
| 72 | + |
| 73 | + // main entry point |
| 74 | + |
| 75 | + void doWalkFunction(Function* func) { |
| 76 | + numLocals = func->getNumLocals(); |
| 77 | + // create the CFG by walking the IR |
| 78 | + CFGWalker<RedundantSetElimination, Visitor<RedundantSetElimination>, Info>::doWalkFunction(func); |
| 79 | + // flow values across blocks |
| 80 | + flowValues(func); |
| 81 | + // remove redundant sets |
| 82 | + optimize(); |
| 83 | + } |
| 84 | + |
| 85 | + // numbering |
| 86 | + |
| 87 | + Index nextValue = 1; // 0 is reserved for the "unseen value" |
| 88 | + std::unordered_map<Literal, Index> literalValues; // each constant has a value |
| 89 | + std::unordered_map<Expression*, Index> expressionValues; // each value can have a value |
| 90 | + std::unordered_map<BasicBlock*, std::unordered_map<Index, Index>> blockMergeValues; // each block has values for each merge |
| 91 | + |
| 92 | + Index getUnseenValue() { // we haven't seen this location yet |
| 93 | + return 0; |
| 94 | + } |
| 95 | + Index getUniqueValue() { |
| 96 | +#ifdef RSE_DEBUG |
| 97 | + std::cout << "new unique value " << nextValue << '\n'; |
| 98 | +#endif |
| 99 | + return nextValue++; |
| 100 | + } |
| 101 | + |
| 102 | + Index getLiteralValue(Literal lit) { |
| 103 | + auto iter = literalValues.find(lit); |
| 104 | + if (iter != literalValues.end()) { |
| 105 | + return iter->second; |
| 106 | + } |
| 107 | +#ifdef RSE_DEBUG |
| 108 | + std::cout << "new literal value for " << lit << '\n'; |
| 109 | +#endif |
| 110 | + return literalValues[lit] = getUniqueValue(); |
| 111 | + } |
| 112 | + |
| 113 | + Index getExpressionValue(Expression* expr) { |
| 114 | + auto iter = expressionValues.find(expr); |
| 115 | + if (iter != expressionValues.end()) { |
| 116 | + return iter->second; |
| 117 | + } |
| 118 | +#ifdef RSE_DEBUG |
| 119 | + std::cout << "new expr value for " << expr << '\n'; |
| 120 | +#endif |
| 121 | + return expressionValues[expr] = getUniqueValue(); |
| 122 | + } |
| 123 | + |
| 124 | + Index getBlockMergeValue(BasicBlock* block, Index index) { |
| 125 | + auto& mergeValues = blockMergeValues[block]; |
| 126 | + auto iter = mergeValues.find(index); |
| 127 | + if (iter != mergeValues.end()) { |
| 128 | + return iter->second; |
| 129 | + } |
| 130 | +#ifdef RSE_DEBUG |
| 131 | + std::cout << "new block-merge value for " << block << " : " << index << '\n'; |
| 132 | +#endif |
| 133 | + return mergeValues[index] = getUniqueValue(); |
| 134 | + } |
| 135 | + |
| 136 | + bool isBlockMergeValue(BasicBlock* block, Index index, Index value) { |
| 137 | + auto iter = blockMergeValues.find(block); |
| 138 | + if (iter == blockMergeValues.end()) return false; |
| 139 | + auto& mergeValues = iter->second; |
| 140 | + auto iter2 = mergeValues.find(index); |
| 141 | + if (iter2 == mergeValues.end()) return false; |
| 142 | + return value == iter2->second; |
| 143 | + } |
| 144 | + |
| 145 | + Index getValue(Expression* value, LocalValues& currValues) { |
| 146 | + if (auto* c = value->dynCast<Const>()) { |
| 147 | + // a constant |
| 148 | + return getLiteralValue(c->value); |
| 149 | + } else if (auto* get = value->dynCast<GetLocal>()) { |
| 150 | + // a copy of whatever that was |
| 151 | + return currValues[get->index]; |
| 152 | + } else { |
| 153 | + // get the value's own unique value |
| 154 | + return getExpressionValue(value); |
| 155 | + } |
| 156 | + } |
| 157 | + |
| 158 | + // flowing |
| 159 | + |
| 160 | + void flowValues(Function* func) { |
| 161 | + for (auto& block : basicBlocks) { |
| 162 | + LocalValues& start = block->contents.start; |
| 163 | + start.resize(numLocals); |
| 164 | + if (block.get() == entry) { |
| 165 | + // params are complex values we can't optimize; vars are zeros |
| 166 | + for (Index i = 0; i < numLocals; i++) { |
| 167 | + if (func->isParam(i)) { |
| 168 | +#ifdef RSE_DEBUG |
| 169 | + std::cout << "new param value for " << i << '\n'; |
| 170 | +#endif |
| 171 | + start[i] = getUniqueValue(); |
| 172 | + } else { |
| 173 | + start[i] = getLiteralValue(LiteralUtils::makeLiteralZero(func->getLocalType(i))); |
| 174 | + } |
| 175 | + } |
| 176 | + } else { |
| 177 | + // other blocks have all unseen values to begin with |
| 178 | + for (Index i = 0; i < numLocals; i++) { |
| 179 | + start[i] = getUnseenValue(); |
| 180 | + } |
| 181 | + } |
| 182 | + // the ends all begin unseen |
| 183 | + LocalValues& end = block->contents.end; |
| 184 | + end.resize(numLocals); |
| 185 | + for (Index i = 0; i < numLocals; i++) { |
| 186 | + end[i] = getUnseenValue(); |
| 187 | + } |
| 188 | + } |
| 189 | + // keep working while stuff is flowing. we use a unique deferred queue |
| 190 | + // which ensures both FIFO and that we don't do needless work - if |
| 191 | + // A and B reach C, and both queue C, we only want to do C at the latest |
| 192 | + // time, when we have information from all those reaching it. |
| 193 | + UniqueDeferredQueue<BasicBlock*> work; |
| 194 | + work.push(entry); |
| 195 | + while (!work.empty()) { |
| 196 | + auto* curr = work.pop(); |
| 197 | +#ifdef RSE_DEBUG |
| 198 | + std::cout << "flow block " << curr << '\n'; |
| 199 | +#endif |
| 200 | + // process a block: first, update its start based on those reaching it |
| 201 | + if (!curr->in.empty()) { |
| 202 | + if (curr->in.size() == 1) { |
| 203 | + // just copy the pred, nothing to merge |
| 204 | + curr->contents.start = (*curr->in.begin())->contents.end; |
| 205 | + } else { |
| 206 | + // perform a merge |
| 207 | + auto in = curr->in; |
| 208 | + for (Index i = 0; i < numLocals; i++) { |
| 209 | + auto old = curr->contents.start[i]; |
| 210 | + // If we already had a merge value here, keep it. |
| 211 | + // TODO This may have some false positives, as we may e.g. have |
| 212 | + // a single pred that first gives us x, then later y after |
| 213 | + // flow led to a merge, and we may see x and y at the same |
| 214 | + // time due to flow from a successor, and then it looks like |
| 215 | + // we need a merge but we don't. avoiding that would require |
| 216 | + // more memory and is probably not worth it, but might be |
| 217 | + // worth investigating |
| 218 | + // NB While suboptimal, this simplification provides a simple proof |
| 219 | + // of convergence. We prove that, in each fixed block+local, |
| 220 | + // the value number at the end is nondecreasing across |
| 221 | + // iterations, by induction on the iteration: |
| 222 | + // * The first iteration is on the entry block. It increases |
| 223 | + // the value number at the end from 0 (unseen) to something |
| 224 | + // else (a value number for 0 for locals, a unique value |
| 225 | + // for params; all >0). |
| 226 | + // * Induction step: assuming the property holds for all past |
| 227 | + // iterations, consider the current iteration. Of our |
| 228 | + // predecessors, those that we iterated on have the property; |
| 229 | + // those that we haven't will have 0 (unseen). |
| 230 | + // * If we assign to that local in this block, that will be |
| 231 | + // the value in the output, forever, and it is greater |
| 232 | + // than the initial value of 0. |
| 233 | + // * If we see different values coming in, we create a merge |
| 234 | + // value number. Its number is higher than everything |
| 235 | + // else since we give it the next available number, so we |
| 236 | + // do not decrease in this iteration, and we will output |
| 237 | + // the same value in the future too (here is where we use |
| 238 | + // the simplification property). |
| 239 | + // * Otherwise, we will flow the incoming value through, |
| 240 | + // and it did not decrease (by induction), so neither do |
| 241 | + // we. |
| 242 | + // Finally, given value numbers are nondecreasing, we must |
| 243 | + // converge since we only keep working as long as we see new |
| 244 | + // values at the end of a block. |
| 245 | + // |
| 246 | + // Not that we don't trust this proof, but the convergence |
| 247 | + // property (value numbers at block ends do not decrease) is |
| 248 | + // verified later down. |
| 249 | + if (isBlockMergeValue(curr, i, old)) { |
| 250 | + continue; |
| 251 | + } |
| 252 | + auto iter = in.begin(); |
| 253 | + auto value = (*iter)->contents.end[i]; |
| 254 | + iter++; |
| 255 | + while (iter != in.end()) { |
| 256 | + auto otherValue = (*iter)->contents.end[i]; |
| 257 | + if (value == getUnseenValue()) { |
| 258 | + value = otherValue; |
| 259 | + } else if (otherValue == getUnseenValue()) { |
| 260 | + // nothing to do, other has no information |
| 261 | + } else if (value != otherValue) { |
| 262 | + // 2 different values, this is a merged value |
| 263 | + value = getBlockMergeValue(curr, i); |
| 264 | + break; // no more work once we see a merge |
| 265 | + } |
| 266 | + iter++; |
| 267 | + } |
| 268 | + curr->contents.start[i] = value; |
| 269 | + } |
| 270 | + } |
| 271 | + } |
| 272 | +#ifdef RSE_DEBUG |
| 273 | + dump("start", curr->contents.start); |
| 274 | +#endif |
| 275 | + // flow values through it, then add those we can reach if they need an update. |
| 276 | + auto currValues = curr->contents.start; // we'll modify this as we go |
| 277 | + auto& setps = curr->contents.setps; |
| 278 | + for (auto** setp : setps) { |
| 279 | + auto* set = (*setp)->cast<SetLocal>(); |
| 280 | + currValues[set->index] = getValue(set->value, currValues); |
| 281 | + } |
| 282 | + if (currValues == curr->contents.end) { |
| 283 | + // nothing changed, so no more work to do |
| 284 | + // note that the first iteration this is always not the case, |
| 285 | + // since end contains unseen (and then the comparison ends on |
| 286 | + // the first element) |
| 287 | + continue; |
| 288 | + } |
| 289 | + // update the end state and update children |
| 290 | +#ifndef NDEBUG |
| 291 | + // verify the convergence property mentioned in the NB comment |
| 292 | + // above: the value numbers at the end must be nondecreasing |
| 293 | + for (Index i = 0; i < numLocals; i++) { |
| 294 | + assert(currValues[i] >= curr->contents.end[i]); |
| 295 | + } |
| 296 | +#endif |
| 297 | + curr->contents.end.swap(currValues); |
| 298 | +#ifdef RSE_DEBUG |
| 299 | + dump("end ", curr->contents.end); |
| 300 | +#endif |
| 301 | + for (auto* next : curr->out) { |
| 302 | + work.push(next); |
| 303 | + } |
| 304 | + } |
| 305 | + } |
| 306 | + |
| 307 | + // optimizing |
| 308 | + void optimize() { |
| 309 | + // in each block, run the values through the sets, |
| 310 | + // and remove redundant sets when we see them |
| 311 | + for (auto& block : basicBlocks) { |
| 312 | + auto currValues = block->contents.start; // we'll modify this as we go |
| 313 | + auto& setps = block->contents.setps; |
| 314 | + for (auto** setp : setps) { |
| 315 | + auto* set = (*setp)->cast<SetLocal>(); |
| 316 | + auto oldValue = currValues[set->index]; |
| 317 | + auto newValue = getValue(set->value, currValues); |
| 318 | + auto index = set->index; |
| 319 | + if (newValue == oldValue) { |
| 320 | + remove(setp); |
| 321 | + continue; // no more work to do |
| 322 | + } |
| 323 | + // update for later steps |
| 324 | + currValues[index] = newValue; |
| 325 | + } |
| 326 | + } |
| 327 | + } |
| 328 | + |
| 329 | + void remove(Expression** setp) { |
| 330 | + auto* set = (*setp)->cast<SetLocal>(); |
| 331 | + auto* value = set->value; |
| 332 | + if (!set->isTee()) { |
| 333 | + auto* drop = ExpressionManipulator::convert<SetLocal, Drop>(set); |
| 334 | + drop->value = value; |
| 335 | + drop->finalize(); |
| 336 | + } else { |
| 337 | + *setp = value; |
| 338 | + } |
| 339 | + } |
| 340 | + |
| 341 | + // debugging |
| 342 | + |
| 343 | + void dump(BasicBlock* block) { |
| 344 | + std::cout << "====\n"; |
| 345 | + if (block) { |
| 346 | + std::cout << "block: " << block << '\n'; |
| 347 | + for (auto* out : block->out) { |
| 348 | + std::cout << " goes to " << out << '\n'; |
| 349 | + } |
| 350 | + } |
| 351 | + for (Index i = 0; i < block->contents.start.size(); i++) { |
| 352 | + std::cout << " start[" << i << "] = " << block->contents.start[i] << '\n'; |
| 353 | + } |
| 354 | + for (auto** setp : block->contents.setps) { |
| 355 | + std::cout << " " << *setp << '\n'; |
| 356 | + } |
| 357 | + std::cout << "====\n"; |
| 358 | + } |
| 359 | + |
| 360 | + void dump(const char* desc, LocalValues& values) { |
| 361 | + std::cout << desc << ": "; |
| 362 | + for (auto x : values) { |
| 363 | + std::cout << x << ' '; |
| 364 | + } |
| 365 | + std::cout << '\n'; |
| 366 | + } |
| 367 | +}; |
| 368 | + |
| 369 | +Pass *createRedundantSetEliminationPass() { |
| 370 | + return new RedundantSetElimination(); |
| 371 | +} |
| 372 | + |
| 373 | +} // namespace wasm |
| 374 | + |
0 commit comments