Skip to content

Commit 64e5a99

Browse files
authored
[analysis] Add a new iterable CFG utility (#5712)
Add a new "analysis" source directory that will contain the source for a new static program analysis framework. To start the framework, add a CFG utility that provides convenient iterators for iterating through the basic blocks of the CFG as well as the predecessors, successors, and contents of each block. The new CFGs are constructed using the existing CFGWalker, but they are different in that the new utility is meant to provide a usable representation of a CFG whereas CFGWalker is meant to allow collecting arbitrary information about each basic block in a CFG. For testing and debugging purposes, add `print` methods to CFGs and basic blocks. This requires exposing the ability to print expression contents excluding children, which was something we previously did only for StackIR. Also add a new gtest file with a test for constructing and printing a CFG. The test reveals some strange properties of the current CFG construction, including empty blocks and strange placement of `loop` instructions, but fixing these problems is left as future work.
1 parent 7d5d24f commit 64e5a99

File tree

9 files changed

+441
-3
lines changed

9 files changed

+441
-3
lines changed

CMakeLists.txt

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,7 @@ add_subdirectory(src/emscripten-optimizer)
359359
add_subdirectory(src/passes)
360360
add_subdirectory(src/support)
361361
add_subdirectory(src/wasm)
362+
add_subdirectory(src/analysis)
362363

363364
if(BUILD_TOOLS)
364365
# Build binaryen tools
@@ -383,7 +384,8 @@ set(binaryen_objs
383384
$<TARGET_OBJECTS:emscripten-optimizer>
384385
$<TARGET_OBJECTS:ir>
385386
$<TARGET_OBJECTS:cfg>
386-
$<TARGET_OBJECTS:support>)
387+
$<TARGET_OBJECTS:support>
388+
$<TARGET_OBJECTS:analysis>)
387389

388390
if(BUILD_LLVM_DWARF)
389391
SET(binaryen_objs ${binaryen_objs} $<TARGET_OBJECTS:llvm_dwarf>)
@@ -430,7 +432,7 @@ if(EMSCRIPTEN)
430432
# binaryen.js WebAssembly variant
431433
add_executable(binaryen_wasm
432434
${binaryen_emscripten_SOURCES})
433-
target_link_libraries(binaryen_wasm wasm asmjs emscripten-optimizer passes ir cfg support wasm)
435+
target_link_libraries(binaryen_wasm wasm asmjs emscripten-optimizer passes ir cfg support analysis wasm)
434436
target_link_libraries(binaryen_wasm "-sFILESYSTEM")
435437
target_link_libraries(binaryen_wasm "-sEXPORT_NAME=Binaryen")
436438
target_link_libraries(binaryen_wasm "-sNODERAWFS=0")
@@ -451,7 +453,7 @@ if(EMSCRIPTEN)
451453
# binaryen.js JavaScript variant
452454
add_executable(binaryen_js
453455
${binaryen_emscripten_SOURCES})
454-
target_link_libraries(binaryen_js wasm asmjs emscripten-optimizer passes ir cfg support wasm)
456+
target_link_libraries(binaryen_js wasm asmjs emscripten-optimizer passes ir cfg support analysis wasm)
455457
target_link_libraries(binaryen_js "-sWASM=0")
456458
target_link_libraries(binaryen_js "-sWASM_ASYNC_COMPILATION=0")
457459
if(${CMAKE_CXX_COMPILER_VERSION} STREQUAL "6.0.1")

src/analysis/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
file(GLOB analysis_HEADERS *.h)
2+
set(analysis_SOURCES
3+
cfg.cpp
4+
${analysis_HEADERS}
5+
)
6+
add_library(analysis OBJECT ${analysis_SOURCES})

src/analysis/cfg-impl.h

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
/*
2+
* Copyright 2023 WebAssembly Community Group participants
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#ifndef wasm_analysis_cfg_impl_h
18+
#define wasm_analysis_cfg_impl_h
19+
20+
#include "cfg.h"
21+
22+
namespace wasm::analysis {
23+
24+
// An iterator over a sequence of contiguous pointers (represented as a pointer
25+
// to a pointer in the sequence) that dereferences the pointed-to pointer.
26+
// TODO: Move this to its own public header if there is ever another use for it.
27+
template<typename T> struct _indirect_ptr_iterator {
28+
using iterator_category = std::random_access_iterator_tag;
29+
using value_type = T;
30+
using different_type = off_t;
31+
using reference = const T&;
32+
using pointer = const T*;
33+
34+
const T* const* ptr;
35+
36+
const T& operator*() const { return **ptr; }
37+
38+
const T& operator[](int n) const { return **(ptr + n); }
39+
40+
_indirect_ptr_iterator& operator+=(int n) {
41+
ptr += n;
42+
return *this;
43+
}
44+
45+
_indirect_ptr_iterator& operator-=(int n) {
46+
ptr -= n;
47+
return *this;
48+
}
49+
50+
_indirect_ptr_iterator& operator++() { return *this += 1; }
51+
52+
_indirect_ptr_iterator operator++(int) {
53+
_indirect_ptr_iterator it = *this;
54+
++(*this);
55+
return it;
56+
}
57+
58+
_indirect_ptr_iterator& operator--() { return *this -= 1; }
59+
60+
_indirect_ptr_iterator operator--(int) {
61+
_indirect_ptr_iterator it = *this;
62+
--(*this);
63+
return it;
64+
}
65+
66+
_indirect_ptr_iterator operator+(int n) const {
67+
_indirect_ptr_iterator it = *this;
68+
it += n;
69+
return it;
70+
}
71+
72+
_indirect_ptr_iterator operator-(int n) const {
73+
_indirect_ptr_iterator it = *this;
74+
it -= n;
75+
return it;
76+
}
77+
78+
bool operator==(const _indirect_ptr_iterator& other) const {
79+
return ptr == other.ptr;
80+
}
81+
82+
bool operator!=(const _indirect_ptr_iterator& other) const {
83+
return !(*this == other);
84+
}
85+
86+
bool operator<(const _indirect_ptr_iterator& other) const {
87+
return ptr < other.ptr;
88+
}
89+
90+
bool operator>(const _indirect_ptr_iterator& other) const {
91+
return ptr > other.ptr;
92+
}
93+
94+
bool operator<=(const _indirect_ptr_iterator& other) const {
95+
return ptr <= other.ptr;
96+
}
97+
98+
bool operator>=(const _indirect_ptr_iterator& other) const {
99+
return ptr >= other.ptr;
100+
}
101+
};
102+
103+
template<typename T>
104+
_indirect_ptr_iterator<T> operator+(int n,
105+
const _indirect_ptr_iterator<T>& it) {
106+
return it + n;
107+
}
108+
109+
// Wraps a vector of pointers and provides dereferencing iterators for it.
110+
template<typename T> struct _indirect_ptr_vec {
111+
using iterator = _indirect_ptr_iterator<T>;
112+
113+
const std::vector<T*>& vec;
114+
115+
_indirect_ptr_vec(const std::vector<T*>& vec) : vec(vec) {}
116+
117+
iterator begin() const { return {&vec.data()[0]}; }
118+
iterator end() const { return {&vec.data()[vec.size()]}; }
119+
};
120+
121+
struct BasicBlock::Predecessors : _indirect_ptr_vec<BasicBlock> {
122+
Predecessors(const BasicBlock& block)
123+
: _indirect_ptr_vec(block.predecessors) {}
124+
};
125+
126+
struct BasicBlock::Successors : _indirect_ptr_vec<BasicBlock> {
127+
Successors(const BasicBlock& block) : _indirect_ptr_vec(block.successors) {}
128+
};
129+
130+
inline BasicBlock::Predecessors BasicBlock::preds() const {
131+
return Predecessors(*this);
132+
}
133+
134+
inline BasicBlock::Successors BasicBlock::succs() const {
135+
return Successors(*this);
136+
}
137+
138+
} // namespace wasm::analysis
139+
140+
#endif // wasm_analysis_cfg_impl_h

src/analysis/cfg.cpp

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
/*
2+
* Copyright 2023 WebAssembly Community Group participants
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include <unordered_map>
18+
19+
#include "analysis/cfg.h"
20+
#include "cfg/cfg-traversal.h"
21+
#include "wasm-stack.h"
22+
23+
namespace wasm::analysis {
24+
25+
CFG CFG::fromFunction(Function* func) {
26+
struct CFGBuilder : CFGWalker<CFGBuilder,
27+
UnifiedExpressionVisitor<CFGBuilder>,
28+
std::vector<Expression*>> {
29+
void visitExpression(Expression* curr) {
30+
if (currBasicBlock) {
31+
currBasicBlock->contents.push_back(curr);
32+
}
33+
}
34+
};
35+
36+
CFGBuilder builder;
37+
builder.walkFunction(func);
38+
39+
size_t numBlocks = builder.basicBlocks.size();
40+
41+
CFG cfg;
42+
cfg.blocks = std::vector<BasicBlock>(numBlocks);
43+
44+
// From here the addresses of the new basic blocks are stable.
45+
std::unordered_map<CFGBuilder::BasicBlock*, BasicBlock*> oldToNewBlocks;
46+
for (size_t i = 0; i < numBlocks; ++i) {
47+
oldToNewBlocks[builder.basicBlocks[i].get()] = &cfg.blocks[i];
48+
}
49+
50+
for (size_t i = 0; i < numBlocks; ++i) {
51+
auto& oldBlock = *builder.basicBlocks[i];
52+
auto& newBlock = cfg.blocks[i];
53+
newBlock.index = i;
54+
newBlock.insts = std::move(oldBlock.contents);
55+
newBlock.predecessors.reserve(oldBlock.in.size());
56+
for (auto* oldPred : oldBlock.in) {
57+
newBlock.predecessors.push_back(oldToNewBlocks.at(oldPred));
58+
}
59+
newBlock.successors.reserve(oldBlock.out.size());
60+
for (auto* oldSucc : oldBlock.out) {
61+
newBlock.successors.push_back(oldToNewBlocks.at(oldSucc));
62+
}
63+
}
64+
65+
// Move-construct a new CFG to get mandatory copy elision, preserving basic
66+
// block addresses through the return.
67+
return CFG(std::move(cfg));
68+
}
69+
70+
void CFG::print(std::ostream& os, Module* wasm) const {
71+
size_t start = 0;
72+
for (auto& block : *this) {
73+
if (&block != &*begin()) {
74+
os << '\n';
75+
}
76+
block.print(os, wasm, start);
77+
start += block.size();
78+
}
79+
}
80+
81+
void BasicBlock::print(std::ostream& os, Module* wasm, size_t start) const {
82+
os << ";; preds: [";
83+
for (auto& pred : preds()) {
84+
if (&pred != &*preds().begin()) {
85+
os << ", ";
86+
}
87+
os << pred.index;
88+
}
89+
os << "], succs: [";
90+
91+
for (auto& succ : succs()) {
92+
if (&succ != &*succs().begin()) {
93+
os << ", ";
94+
}
95+
os << succ.index;
96+
}
97+
os << "]\n";
98+
99+
os << index << ":\n";
100+
size_t instIndex = start;
101+
for (auto* inst : *this) {
102+
os << " " << instIndex++ << ": " << ShallowExpression{inst, wasm} << '\n';
103+
}
104+
}
105+
106+
} // namespace wasm::analysis

src/analysis/cfg.h

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
/*
2+
* Copyright 2023 WebAssembly Community Group participants
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
// A generic CFG / basic block utility. Unlike the utilities in src/cfg/, this
18+
// is a generic representation of a CFG rather than a generic builder of
19+
// CFG-like objects. It lives here in src/analysis/ because it is primarily
20+
// meant for use in the static analysis framework. Other Binaryen code will find
21+
// it more idiomatic to use the utilities in src/cfg/.
22+
23+
#ifndef wasm_analysis_cfg_h
24+
#define wasm_analysis_cfg_h
25+
26+
#include <iostream>
27+
#include <vector>
28+
29+
#include "wasm.h"
30+
31+
namespace wasm::analysis {
32+
33+
struct BasicBlock;
34+
35+
struct CFG {
36+
// Iterate through basic blocks.
37+
using iterator = std::vector<BasicBlock>::const_iterator;
38+
iterator begin() const { return blocks.cbegin(); }
39+
iterator end() const { return blocks.cend(); }
40+
size_t size() const { return blocks.size(); }
41+
42+
static CFG fromFunction(Function* func);
43+
44+
void print(std::ostream& os, Module* wasm = nullptr) const;
45+
46+
private:
47+
std::vector<BasicBlock> blocks;
48+
friend BasicBlock;
49+
};
50+
51+
struct BasicBlock {
52+
using iterator = std::vector<Expression*>::const_iterator;
53+
54+
// Iterate through instructions.
55+
iterator begin() const { return insts.cbegin(); }
56+
iterator end() const { return insts.cend(); }
57+
size_t size() const { return insts.size(); }
58+
59+
// Iterables for predecessor and successor blocks.
60+
struct Predecessors;
61+
struct Successors;
62+
Predecessors preds() const;
63+
Successors succs() const;
64+
65+
void print(std::ostream& os, Module* wasm = nullptr, size_t start = 0) const;
66+
67+
private:
68+
Index index;
69+
std::vector<Expression*> insts;
70+
std::vector<BasicBlock*> predecessors;
71+
std::vector<BasicBlock*> successors;
72+
friend CFG;
73+
};
74+
75+
} // namespace wasm::analysis
76+
77+
#include "cfg-impl.h"
78+
79+
#endif // wasm_analysis_cfg_h

src/passes/Print.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3776,6 +3776,17 @@ std::ostream& operator<<(std::ostream& o, wasm::ModuleExpression pair) {
37763776
return wasm::printExpression(pair.second, o, false, false, &pair.first);
37773777
}
37783778

3779+
std::ostream& operator<<(std::ostream& o, wasm::ShallowExpression expression) {
3780+
if (expression.module) {
3781+
wasm::PrintExpressionContents printer(expression.module, nullptr, o);
3782+
printer.visit(expression.expr);
3783+
} else {
3784+
wasm::PrintExpressionContents printer(nullptr, o);
3785+
printer.visit(expression.expr);
3786+
}
3787+
return o;
3788+
}
3789+
37793790
std::ostream& operator<<(std::ostream& o, wasm::StackInst& inst) {
37803791
return wasm::printStackInst(&inst, o);
37813792
}

0 commit comments

Comments
 (0)