Skip to content

Commit 4718401

Browse files
committed
feat: Add automatic type inference and SIMD-accelerated core algorithms
This commit introduces a major capability to automatically refine variable types during decompilation and overhauls the core algorithmic foundation with SIMD acceleration and cache-friendly data structures to support real-time analysis. Primary changes: include/structor/type_fixer.hpp (new): - Implemented TypeFixer to analyze variable access patterns in Hex-Rays cfuncs. - Detects and repairs generic types (void* -> struct*, int -> pointer). - Configurable thresholds for applying fixes based on inference confidence. include/structor/simd.hpp (new): - Added portable SIMD abstraction layer supporting AVX-512, AVX2, SSE4.1, and NEON. - Provides unified interface for vector loads, stores, and arithmetic. include/structor/optimized_algorithms.hpp (new): - Implemented O(n log n) sweep-line algorithm for interval overlap detection, replacing O(n^2) pairwise checks. - Added SIMD-accelerated binary GCD and batch coverage analysis. include/structor/optimized_containers.hpp (new): - Added Arena and ObjectPool allocators to reduce heap fragmentation during synthesis. - Implemented FlatUnionFind and FlatHashSet for cache-locality improvements. - Added SmallVector for inline storage optimization. src/plugin.cpp: - Hooked Hex-Rays `hxe_func_printed` event to trigger automatic type fixing. - Added IDC bindings for manual type analysis and fixing (e.g., `structor_fix_function_types`). src/z3/layout_constraints.cpp & src/z3/field_candidates.cpp: - Integrated optimized algorithms into the constraint generation pipeline. - Replaced naive overlap checks with sweep-line implementation. - Added prefetching hints for candidate traversal. src/z3/type_lattice.cpp: - Added memoization (caching) for Least Upper Bound (LUB) and Greatest Lower Bound (GLB) operations. - Improved type hashing using FNV-1a with golden ratio mixing to reduce collisions. bench/ (new): - Added comprehensive benchmark suite comparing naive vs. optimized implementations. Impact: - User Experience: Decompiled code now automatically receives better type definitions without manual intervention. - Performance: Constraint generation for large structures is significantly faster due to algorithmic complexity reduction (O(n^2) -> O(n log n)) and hardware acceleration. - Reliability: Type inference is conservative by default but can be tuned via config.
1 parent a7876d0 commit 4718401

19 files changed

+5059
-125
lines changed

bench/bench_algorithms.cpp

Lines changed: 586 additions & 0 deletions
Large diffs are not rendered by default.

bench/test_alloc.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// Test aligned allocation
2+
#include <iostream>
3+
#include <cstdlib>
4+
#include <cstdint>
5+
6+
void* my_aligned_alloc(size_t alignment, size_t size) {
7+
std::cerr << "aligned_alloc(alignment=" << alignment << ", size=" << size << ")\n";
8+
9+
#if defined(__APPLE__)
10+
void* ptr = nullptr;
11+
int result = posix_memalign(&ptr, alignment, size);
12+
std::cerr << " posix_memalign returned " << result << ", ptr=" << ptr << "\n";
13+
if (result != 0) {
14+
return nullptr;
15+
}
16+
return ptr;
17+
#else
18+
return std::aligned_alloc(alignment, size);
19+
#endif
20+
}
21+
22+
int main() {
23+
// Test with alignof(int32_t) = 4
24+
void* p1 = my_aligned_alloc(4, 64);
25+
if (p1) std::free(p1);
26+
27+
// Test with alignment = 16
28+
void* p2 = my_aligned_alloc(16, 64);
29+
if (p2) std::free(p2);
30+
31+
// Test with alignment = 4 but small size
32+
void* p3 = my_aligned_alloc(4, 32);
33+
if (p3) std::free(p3);
34+
35+
return 0;
36+
}

bench/test_overlap.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// Simple test for overlap detection
2+
#define STRUCTOR_TESTING
3+
#include "structor/optimized_algorithms.hpp"
4+
#include <iostream>
5+
#include <random>
6+
7+
using namespace structor::algorithms;
8+
9+
int main() {
10+
std::cerr << "Creating intervals...\n";
11+
12+
std::mt19937 rng(42);
13+
std::uniform_int_distribution<int64_t> start_dist(0, 1000);
14+
std::uniform_int_distribution<int64_t> len_dist(250, 500);
15+
16+
std::vector<Interval> intervals;
17+
for (int i = 0; i < 256; ++i) {
18+
int64_t start = start_dist(rng);
19+
int64_t len = len_dist(rng);
20+
intervals.emplace_back(start, start + len, i);
21+
}
22+
23+
std::cerr << "Created " << intervals.size() << " intervals\n";
24+
std::cerr << "Finding overlaps...\n";
25+
26+
try {
27+
auto result = find_overlapping_pairs(intervals);
28+
std::cerr << "Found " << result.size() << " overlapping pairs\n";
29+
} catch (const std::exception& e) {
30+
std::cerr << "Exception: " << e.what() << "\n";
31+
return 1;
32+
}
33+
34+
std::cerr << "Done\n";
35+
return 0;
36+
}

bench/test_union_find.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#define STRUCTOR_TESTING
2+
#include "structor/optimized_containers.hpp"
3+
#include <iostream>
4+
#include <vector>
5+
6+
using namespace structor;
7+
8+
int main() {
9+
std::cerr << "Creating FlatUnionFind...\n";
10+
FlatUnionFind uf(256);
11+
12+
std::cerr << "Making sets...\n";
13+
for (int i = 0; i < 256; ++i) {
14+
uf.make_set(i);
15+
}
16+
17+
std::cerr << "Performing unions...\n";
18+
for (int i = 0; i < 100; ++i) {
19+
uf.unite(i, i + 100);
20+
if (i % 20 == 0) {
21+
std::cerr << " united " << i << " and " << i+100 << "\n";
22+
}
23+
}
24+
25+
std::cerr << "Done\n";
26+
return 0;
27+
}

include/structor/api.hpp

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "pseudocode_rewriter.hpp"
1010
#include "structure_persistence.hpp"
1111
#include "ui_integration.hpp"
12+
#include "type_fixer.hpp"
1213

1314
namespace structor {
1415

@@ -60,6 +61,27 @@ class StructorAPI {
6061
const tinfo_t& type,
6162
PropagationDirection direction = PropagationDirection::Both);
6263

64+
/// Fix types for all variables in a function
65+
/// Analyzes access patterns and applies inferred types when significantly different
66+
[[nodiscard]] TypeFixResult fix_function_types(
67+
ea_t func_ea,
68+
const TypeFixerConfig* config = nullptr);
69+
70+
/// Fix types for a specific variable in a function
71+
[[nodiscard]] VariableTypeFix fix_variable_type(
72+
ea_t func_ea,
73+
int var_idx,
74+
const TypeFixerConfig* config = nullptr);
75+
76+
/// Fix types for a variable by name
77+
[[nodiscard]] VariableTypeFix fix_variable_type(
78+
ea_t func_ea,
79+
const char* var_name,
80+
const TypeFixerConfig* config = nullptr);
81+
82+
/// Analyze types without fixing (dry run)
83+
[[nodiscard]] TypeFixResult analyze_function_types(ea_t func_ea);
84+
6385
/// Get current configuration
6486
[[nodiscard]] const SynthOptions& get_options() const {
6587
return Config::instance().options();
@@ -181,6 +203,101 @@ inline PropagationResult StructorAPI::propagate_type(
181203
return propagator.propagate(func_ea, var_idx, type, direction);
182204
}
183205

206+
inline TypeFixResult StructorAPI::fix_function_types(
207+
ea_t func_ea,
208+
const TypeFixerConfig* config)
209+
{
210+
TypeFixerConfig cfg = config ? *config : TypeFixerConfig();
211+
TypeFixer fixer(cfg);
212+
return fixer.fix_function_types(func_ea);
213+
}
214+
215+
inline VariableTypeFix StructorAPI::fix_variable_type(
216+
ea_t func_ea,
217+
int var_idx,
218+
const TypeFixerConfig* config)
219+
{
220+
VariableTypeFix result;
221+
result.var_idx = var_idx;
222+
223+
cfuncptr_t cfunc = utils::get_cfunc(func_ea);
224+
if (!cfunc) {
225+
result.skip_reason = "Failed to decompile function";
226+
return result;
227+
}
228+
229+
lvars_t* lvars = cfunc->get_lvars();
230+
if (!lvars || var_idx < 0 || static_cast<size_t>(var_idx) >= lvars->size()) {
231+
result.skip_reason = "Invalid variable index";
232+
return result;
233+
}
234+
235+
result.var_name = lvars->at(var_idx).name;
236+
result.is_argument = lvars->at(var_idx).is_arg_var();
237+
238+
TypeFixerConfig cfg = config ? *config : TypeFixerConfig();
239+
TypeFixer fixer(cfg);
240+
241+
// Analyze the variable
242+
result.comparison = fixer.analyze_variable(cfunc, var_idx);
243+
244+
// Apply fix if significant and not dry run
245+
if (result.comparison.is_significant() && !cfg.dry_run) {
246+
PropagationResult prop;
247+
if (fixer.apply_fix(cfunc, var_idx, result.comparison.inferred_type,
248+
cfg.propagate_fixes ? &prop : nullptr)) {
249+
result.applied = true;
250+
result.propagation = std::move(prop);
251+
} else {
252+
result.skip_reason = "Failed to apply type";
253+
}
254+
} else if (!result.comparison.is_significant()) {
255+
result.skip_reason.sprnt("Not significant (%s)",
256+
type_difference_str(result.comparison.difference));
257+
} else {
258+
result.skip_reason = "Dry run mode";
259+
}
260+
261+
return result;
262+
}
263+
264+
inline VariableTypeFix StructorAPI::fix_variable_type(
265+
ea_t func_ea,
266+
const char* var_name,
267+
const TypeFixerConfig* config)
268+
{
269+
VariableTypeFix result;
270+
271+
cfuncptr_t cfunc = utils::get_cfunc(func_ea);
272+
if (!cfunc) {
273+
result.skip_reason = "Failed to decompile function";
274+
return result;
275+
}
276+
277+
lvar_t* var = utils::find_lvar_by_name(cfunc, var_name);
278+
if (!var) {
279+
result.skip_reason.sprnt("Variable '%s' not found", var_name);
280+
return result;
281+
}
282+
283+
lvars_t& lvars = *cfunc->get_lvars();
284+
for (size_t i = 0; i < lvars.size(); ++i) {
285+
if (&lvars[i] == var) {
286+
return fix_variable_type(func_ea, static_cast<int>(i), config);
287+
}
288+
}
289+
290+
result.skip_reason = "Variable index lookup failed";
291+
return result;
292+
}
293+
294+
inline TypeFixResult StructorAPI::analyze_function_types(ea_t func_ea) {
295+
TypeFixerConfig cfg;
296+
cfg.dry_run = true; // Don't actually apply changes
297+
TypeFixer fixer(cfg);
298+
return fixer.fix_function_types(func_ea);
299+
}
300+
184301
inline SynthResult StructorAPI::do_synthesis(ea_t func_ea, int var_idx, const SynthOptions& opts) {
185302
SynthResult result;
186303

include/structor/config.hpp

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,10 @@ struct SynthOptions {
141141
bool debug_mode; // Enable debug logging (adopted from Suture)
142142
AccessPredicate access_filter; // Filter predicate for accesses (adopted from Suture)
143143

144+
// Automatic type fixing options
145+
bool auto_fix_types; // Automatically fix types when decompiling
146+
bool auto_fix_verbose; // Print messages about auto-fixed types
147+
144148
// Z3-specific options
145149
Z3Options z3; // Z3 synthesis configuration
146150

@@ -161,6 +165,8 @@ struct SynthOptions {
161165
, emit_substructs(true)
162166
, debug_mode(false)
163167
, access_filter(predicates::accept_all)
168+
, auto_fix_types(true)
169+
, auto_fix_verbose(false)
164170
, z3() {}
165171
};
166172

@@ -258,7 +264,8 @@ inline bool Config::load() {
258264

259265
std::ifstream file(path);
260266
if (!file.is_open()) {
261-
// No config file, use defaults
267+
// No config file, create one with defaults
268+
save();
262269
return true;
263270
}
264271

@@ -316,6 +323,12 @@ inline bool Config::load() {
316323
} else if (key == "debug_mode") {
317324
options_.debug_mode = parse_bool(value);
318325
}
326+
// Auto type fixing options
327+
else if (key == "auto_fix_types") {
328+
options_.auto_fix_types = parse_bool(value);
329+
} else if (key == "auto_fix_verbose") {
330+
options_.auto_fix_verbose = parse_bool(value);
331+
}
319332
// Z3 options
320333
else if (key == "z3_mode") {
321334
if (value == "disabled") options_.z3.mode = Z3SynthesisMode::Disabled;
@@ -378,6 +391,11 @@ inline bool Config::save() {
378391
file << "debug_mode=" << (options_.debug_mode ? "true" : "false") << "\n";
379392
file << "\n";
380393

394+
file << "[TypeFix]\n";
395+
file << "auto_fix_types=" << (options_.auto_fix_types ? "true" : "false") << "\n";
396+
file << "auto_fix_verbose=" << (options_.auto_fix_verbose ? "true" : "false") << "\n";
397+
file << "\n";
398+
381399
file << "[Synthesis]\n";
382400
file << "min_accesses=" << options_.min_accesses << "\n";
383401
file << "alignment=" << options_.alignment << "\n";

0 commit comments

Comments
 (0)