Skip to content

Commit 6dd9248

Browse files
committed
feat: Enable data flow analysis through function return values
This commit extends the cross-function analysis and type propagation engines to track variables across function boundaries via return values. Previously, analysis was limited to parameter passing; this change enables tracking data flow in factory patterns, getters, and chained calls (e.g., `x = create(); return x->field;`). include/structor/cross_function_analyzer.hpp: - Added `find_return_sources` to identify which variables are returned by a function, including pointer offsets. - Added `find_return_assignments` to locate where a function call's return value is assigned to a local variable. - Added `find_callers_with_return` to map a callee to specific variables in its callers. src/cross_function_analyzer.cpp: - Implemented `ReturnSourceFinder` visitor to parse `cit_return` instructions and extract `var + delta` arithmetic. - Implemented `ReturnAssignmentFinder` visitor to parse `var = call(...)` patterns. - Updated `trace_forward` to descend into callees when the tracked variable is assigned a return value. - Updated `trace_backward` to ascend to callers when the tracked variable is returned, correctly applying pointer deltas. include/structor/type_propagator.hpp: - Updated `propagate_forward` to apply types to variables returned by called functions. - Updated `propagate_backward` to propagate types from a returned variable to the caller's assignment target. - Added AST visitors specific to type propagation for handling return flow logic. integration_tests/test_callgraph_return.c: - Added a new test suite covering factory functions (`make_root`), offset returns (`make_sub`), and complex call chains to verify pointer arithmetic handling across boundaries. Impact: - Significantly improves structure recovery for object-oriented C code and factory patterns. - Analysis will now traverse deeper into the call graph, potentially increasing analysis time but yielding higher fidelity type reconstruction. - Correctly handles pointer arithmetic on returns (e.g., returning a pointer to a struct member), preventing type confusion in sub-structs.
1 parent 50495ef commit 6dd9248

File tree

4 files changed

+553
-1
lines changed

4 files changed

+553
-1
lines changed

include/structor/cross_function_analyzer.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,15 @@ class CrossFunctionAnalyzer {
287287
/// Get or decompile a function (with caching)
288288
[[nodiscard]] cfuncptr_t get_cfunc(ea_t func_ea);
289289

290+
/// Collect return sources as (var_idx, delta) pairs
291+
[[nodiscard]] qvector<std::pair<int, sval_t>> find_return_sources(cfunc_t* cfunc);
292+
293+
/// Find assignments of call return values in the current function
294+
[[nodiscard]] qvector<std::pair<ea_t, int>> find_return_assignments(cfunc_t* cfunc);
295+
296+
/// Find callers that assign this function's return to a variable
297+
[[nodiscard]] qvector<std::pair<ea_t, int>> find_callers_with_return(ea_t func_ea);
298+
290299
/// Internal function cache to avoid redundant decompilation
291300
std::unordered_map<ea_t, cfuncptr_t> cfunc_cache_;
292301
};

include/structor/type_propagator.hpp

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,14 @@ class TypePropagator {
7171
int var_idx,
7272
qvector<std::pair<ea_t, int>>& sources);
7373

74+
void find_return_sources(
75+
cfunc_t* cfunc,
76+
qvector<std::pair<int, sval_t>>& sources);
77+
78+
void find_callers_with_return(
79+
ea_t func_ea,
80+
qvector<std::pair<ea_t, int>>& callers);
81+
7482
[[nodiscard]] bool is_parameter(cfunc_t* cfunc, int var_idx);
7583
[[nodiscard]] int get_param_index(cfunc_t* cfunc, int var_idx);
7684

@@ -249,6 +257,45 @@ inline void TypePropagator::propagate_forward(
249257
result.add_failure(std::move(site));
250258
}
251259
}
260+
261+
// Propagate through return-value assignments: var = callee()
262+
qvector<std::pair<ea_t, int>> return_sources;
263+
find_assigned_from(cfunc, var_idx, return_sources);
264+
for (const auto& [callee_ea, ret_marker] : return_sources) {
265+
(void)ret_marker;
266+
cfuncptr_t callee_cfunc = utils::get_cfunc(callee_ea);
267+
if (!callee_cfunc) continue;
268+
269+
qvector<std::pair<int, sval_t>> return_vars;
270+
find_return_sources(callee_cfunc, return_vars);
271+
272+
for (const auto& [return_var_idx, return_delta] : return_vars) {
273+
(void)return_delta;
274+
auto key = make_visit_key(callee_ea, return_var_idx);
275+
if (visited_.count(key)) continue;
276+
visited_.insert(key);
277+
278+
PropagationSite site;
279+
site.func_ea = callee_ea;
280+
site.var_idx = return_var_idx;
281+
site.new_type = type;
282+
site.direction = PropagationDirection::Forward;
283+
284+
lvars_t& callee_lvars = *callee_cfunc->get_lvars();
285+
if (return_var_idx >= 0 && static_cast<size_t>(return_var_idx) < callee_lvars.size()) {
286+
site.var_name = callee_lvars[return_var_idx].name;
287+
site.old_type = callee_lvars[return_var_idx].type();
288+
}
289+
290+
if (apply_type(callee_cfunc, return_var_idx, type)) {
291+
result.add_success(std::move(site));
292+
propagate_forward(callee_ea, return_var_idx, type, depth + 1, result);
293+
} else {
294+
site.failure_reason = "Failed to apply type";
295+
result.add_failure(std::move(site));
296+
}
297+
}
298+
}
252299
}
253300

254301
inline void TypePropagator::propagate_backward(
@@ -263,6 +310,51 @@ inline void TypePropagator::propagate_backward(
263310
cfuncptr_t cfunc = utils::get_cfunc(func_ea);
264311
if (!cfunc) return;
265312

313+
// Propagate through return-value assignments: caller_var = func()
314+
qvector<std::pair<int, sval_t>> return_vars;
315+
find_return_sources(cfunc, return_vars);
316+
for (const auto& [return_var_idx, return_delta] : return_vars) {
317+
if (return_var_idx != var_idx) continue;
318+
(void)return_delta;
319+
320+
qvector<std::pair<ea_t, int>> callers;
321+
find_callers_with_return(func_ea, callers);
322+
323+
for (const auto& [caller_ea, caller_var_idx] : callers) {
324+
auto key = make_visit_key(caller_ea, caller_var_idx);
325+
if (visited_.count(key)) continue;
326+
visited_.insert(key);
327+
328+
cfuncptr_t caller_cfunc = utils::get_cfunc(caller_ea);
329+
if (!caller_cfunc) continue;
330+
331+
PropagationSite site;
332+
site.func_ea = caller_ea;
333+
site.var_idx = caller_var_idx;
334+
site.new_type = type;
335+
site.direction = PropagationDirection::Backward;
336+
337+
lvars_t& caller_lvars = *caller_cfunc->get_lvars();
338+
if (caller_var_idx >= 0 && static_cast<size_t>(caller_var_idx) < caller_lvars.size()) {
339+
site.var_name = caller_lvars[caller_var_idx].name;
340+
site.old_type = caller_lvars[caller_var_idx].type();
341+
}
342+
343+
if (apply_type(caller_cfunc, caller_var_idx, type)) {
344+
result.add_success(std::move(site));
345+
346+
// Continue backward propagation
347+
propagate_backward(caller_ea, caller_var_idx, type, depth + 1, result);
348+
349+
// Also propagate forward to reach siblings
350+
propagate_forward(caller_ea, caller_var_idx, type, depth + 1, result);
351+
} else {
352+
site.failure_reason = "Failed to apply type";
353+
result.add_failure(std::move(site));
354+
}
355+
}
356+
}
357+
266358
// Check if this is a parameter
267359
if (!is_parameter(cfunc, var_idx)) return;
268360

@@ -542,6 +634,110 @@ inline void TypePropagator::find_assigned_from(
542634
visitor.apply_to(&cfunc->body, nullptr);
543635
}
544636

637+
inline void TypePropagator::find_return_sources(
638+
cfunc_t* cfunc,
639+
qvector<std::pair<int, sval_t>>& sources)
640+
{
641+
if (!cfunc) return;
642+
643+
struct ReturnVisitor : public ctree_visitor_t {
644+
qvector<std::pair<int, sval_t>>& sources;
645+
646+
ReturnVisitor(qvector<std::pair<int, sval_t>>& s)
647+
: ctree_visitor_t(CV_FAST)
648+
, sources(s) {}
649+
650+
int idaapi visit_insn(cinsn_t* insn) override {
651+
if (!insn || insn->op != cit_return) return 0;
652+
if (!insn->creturn) return 0;
653+
654+
cexpr_t* expr = &insn->creturn->expr;
655+
if (!expr || expr->op == cot_empty) return 0;
656+
657+
auto info = utils::extract_ptr_arith(expr);
658+
if (!info.valid || info.var_idx < 0) return 0;
659+
660+
for (const auto& entry : sources) {
661+
if (entry.first == info.var_idx && entry.second == info.offset) {
662+
return 0;
663+
}
664+
}
665+
666+
sources.push_back({info.var_idx, info.offset});
667+
return 0;
668+
}
669+
};
670+
671+
ReturnVisitor visitor(sources);
672+
visitor.apply_to(&cfunc->body, nullptr);
673+
}
674+
675+
inline void TypePropagator::find_callers_with_return(
676+
ea_t func_ea,
677+
qvector<std::pair<ea_t, int>>& callers)
678+
{
679+
qvector<ea_t> caller_funcs = utils::get_callers(func_ea);
680+
681+
for (ea_t caller_ea : caller_funcs) {
682+
cfuncptr_t caller_cfunc = utils::get_cfunc(caller_ea);
683+
if (!caller_cfunc) continue;
684+
685+
struct ReturnCallerFinder : public ctree_visitor_t {
686+
ea_t target_func;
687+
ea_t caller_ea;
688+
qvector<std::pair<ea_t, int>>& results;
689+
690+
ReturnCallerFinder(ea_t func, ea_t caller, qvector<std::pair<ea_t, int>>& r)
691+
: ctree_visitor_t(CV_FAST)
692+
, target_func(func)
693+
, caller_ea(caller)
694+
, results(r) {}
695+
696+
static cexpr_t* find_base_var(cexpr_t* expr) {
697+
while (expr) {
698+
if (expr->op == cot_var) return expr;
699+
if (expr->op == cot_cast || expr->op == cot_ref) {
700+
expr = expr->x;
701+
} else if (expr->op == cot_add || expr->op == cot_sub) {
702+
cexpr_t* left = find_base_var(expr->x);
703+
if (left) return left;
704+
expr = expr->y;
705+
} else if (expr->op == cot_memref || expr->op == cot_memptr) {
706+
expr = expr->x;
707+
} else if (expr->op == cot_idx) {
708+
expr = expr->x;
709+
} else {
710+
break;
711+
}
712+
}
713+
return nullptr;
714+
}
715+
716+
int idaapi visit_expr(cexpr_t* expr) override {
717+
if (!expr || expr->op != cot_asg) return 0;
718+
719+
cexpr_t* lhs = expr->x;
720+
cexpr_t* rhs = expr->y;
721+
while (rhs && rhs->op == cot_cast) {
722+
rhs = rhs->x;
723+
}
724+
725+
if (!rhs || rhs->op != cot_call || !rhs->x) return 0;
726+
if (rhs->x->op != cot_obj || rhs->x->obj_ea != target_func) return 0;
727+
728+
cexpr_t* base = find_base_var(lhs);
729+
if (!base || base->op != cot_var) return 0;
730+
731+
results.push_back({caller_ea, base->v.idx});
732+
return 0;
733+
}
734+
};
735+
736+
ReturnCallerFinder finder(func_ea, caller_ea, callers);
737+
finder.apply_to(&caller_cfunc->body, nullptr);
738+
}
739+
}
740+
545741
inline bool TypePropagator::is_parameter(cfunc_t* cfunc, int var_idx) {
546742
if (!cfunc || var_idx < 0) return false;
547743

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
#include <stdint.h>
2+
#include <stdio.h>
3+
4+
static volatile uint64_t sink;
5+
6+
#define OFF_MAGIC 0x00 // uint64_t
7+
#define OFF_FLAGS 0x08 // uint32_t / float (intentional conflict)
8+
#define OFF_MODE 0x0C // uint16_t
9+
#define OFF_SCORE 0x10 // double
10+
#define OFF_KIND 0x18 // uint8_t
11+
#define OFF_SELF 0x20 // void*
12+
#define OFF_COUNT 0x28 // uint32_t
13+
#define OFF_ARR0 0x30 // uint64_t[3]
14+
#define OFF_ARR1 0x38
15+
#define OFF_ARR2 0x40
16+
#define OFF_CRC 0x48 // uint32_t
17+
#define OFF_BYTES 0x50 // uint8_t[8]
18+
#define OFF_SUB 0x60 // sub-struct base
19+
20+
#define SUB_TAG 0x00 // uint32_t
21+
#define SUB_VALUE 0x08 // uint64_t
22+
#define SUB_STATE 0x10 // uint8_t
23+
24+
__attribute__((noinline))
25+
void init_root(void *p) {
26+
uint8_t *b = (uint8_t *)p;
27+
*(uint64_t *)(b + OFF_MAGIC) = 0x1122334455667788ULL;
28+
*(uint32_t *)(b + OFF_FLAGS) = 0xAABBCCDDU;
29+
*(uint16_t *)(b + OFF_MODE) = 0x3344U;
30+
*(double *)(b + OFF_SCORE) = 3.14159;
31+
*(uint8_t *)(b + OFF_KIND) = 0x7FU;
32+
*(void **)(b + OFF_SELF) = p;
33+
*(uint32_t *)(b + OFF_COUNT) = 3U;
34+
35+
*(uint64_t *)(b + OFF_ARR0) = 0x1111111111111111ULL;
36+
*(uint64_t *)(b + OFF_ARR1) = 0x2222222222222222ULL;
37+
*(uint64_t *)(b + OFF_ARR2) = 0x3333333333333333ULL;
38+
39+
*(uint32_t *)(b + OFF_CRC) = 0xDEADBEAFU;
40+
41+
b[OFF_BYTES + 0] = 0x10;
42+
b[OFF_BYTES + 1] = 0x20;
43+
b[OFF_BYTES + 2] = 0x30;
44+
b[OFF_BYTES + 3] = 0x40;
45+
46+
// Sub-struct at base + OFF_SUB
47+
uint8_t *s = b + OFF_SUB;
48+
*(uint32_t *)(s + SUB_TAG) = 0x1234U;
49+
*(uint64_t *)(s + SUB_VALUE) = 0xCAFEBABECAFED00DULL;
50+
*(uint8_t *)(s + SUB_STATE) = 0x5AU;
51+
}
52+
53+
__attribute__((noinline))
54+
void process_root(void *p) {
55+
uint8_t *b = (uint8_t *)p;
56+
sink ^= *(uint64_t *)(b + OFF_MAGIC);
57+
sink ^= *(uint32_t *)(b + OFF_FLAGS);
58+
sink ^= *(uint16_t *)(b + OFF_MODE);
59+
sink ^= (uint64_t)(*(uint8_t *)(b + OFF_KIND));
60+
sink ^= *(uint64_t *)(b + OFF_ARR1);
61+
sink ^= *(uint32_t *)(b + OFF_CRC);
62+
}
63+
64+
__attribute__((noinline))
65+
void conflict_reader(void *p) {
66+
uint8_t *b = (uint8_t *)p;
67+
float f = *(float *)(b + OFF_FLAGS);
68+
sink ^= (uint64_t)(*(uint32_t *)&f);
69+
}
70+
71+
__attribute__((noinline))
72+
void sibling_reader(void *p) {
73+
uint8_t *b = (uint8_t *)p;
74+
sink ^= (uint64_t)(*(uint8_t *)(b + OFF_BYTES + 1));
75+
sink ^= *(uint64_t *)(b + OFF_ARR2);
76+
}
77+
78+
__attribute__((noinline))
79+
void process_sub(void *sub) {
80+
uint8_t *s = (uint8_t *)sub;
81+
sink ^= *(uint32_t *)(s + SUB_TAG);
82+
sink ^= *(uint64_t *)(s + SUB_VALUE);
83+
sink ^= (uint64_t)(*(uint8_t *)(s + SUB_STATE));
84+
}
85+
86+
__attribute__((noinline))
87+
void wrapper_chain(void *p) {
88+
process_root(p);
89+
process_sub((uint8_t *)p + OFF_SUB);
90+
}
91+
92+
__attribute__((noinline))
93+
void alias_forward(void *p) {
94+
void *alias = p;
95+
process_root(alias);
96+
}
97+
98+
__attribute__((noinline))
99+
void *make_root(void) {
100+
static uint8_t storage[0x80];
101+
init_root(storage);
102+
return storage;
103+
}
104+
105+
__attribute__((noinline))
106+
void *make_sub(void *p) {
107+
return (uint8_t *)p + OFF_SUB;
108+
}
109+
110+
__attribute__((noinline))
111+
void chain_from_return(void) {
112+
void *p = make_root();
113+
process_root(p);
114+
conflict_reader(p);
115+
sibling_reader(p);
116+
117+
void *sub = make_sub(p);
118+
process_sub(sub);
119+
}
120+
121+
int main(void) {
122+
uint8_t buf[0x80];
123+
124+
// Address-of pass to exercise cot_ref argument matching.
125+
init_root(&buf);
126+
127+
wrapper_chain(buf);
128+
sibling_reader(buf);
129+
alias_forward(buf);
130+
conflict_reader(buf);
131+
132+
chain_from_return();
133+
134+
printf("sink=%llx\n", (unsigned long long)sink);
135+
return 0;
136+
}

0 commit comments

Comments
 (0)