Skip to content

Commit e96a7a0

Browse files
committed
api: indirect branch inlining support
1 parent 3bfeb0e commit e96a7a0

File tree

7 files changed

+158
-95
lines changed

7 files changed

+158
-95
lines changed

architecture.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,15 @@ std::set<ArchAndAddr>& BasicBlockAnalysisContext::GetHaltedDisassemblyAddresses(
298298
}
299299

300300

301+
std::map<ArchAndAddr, ArchAndAddr>& BasicBlockAnalysisContext::GetInlinedUnresolvedIndirectBranches()
302+
{
303+
if (!m_inlinedUnresolvedIndirectBranches)
304+
m_inlinedUnresolvedIndirectBranches.emplace();
305+
306+
return *m_inlinedUnresolvedIndirectBranches;
307+
}
308+
309+
301310
void BasicBlockAnalysisContext::AddTempOutgoingReference(Function* targetFunc)
302311
{
303312
BNAnalyzeBasicBlocksContextAddTempReference(m_context, targetFunc->m_object);
@@ -386,6 +395,29 @@ void BasicBlockAnalysisContext::Finalize()
386395
delete[] haltedAddresses;
387396
}
388397

398+
if (m_inlinedUnresolvedIndirectBranches)
399+
{
400+
auto& inlinedUnresolvedIndirectBranches = *m_inlinedUnresolvedIndirectBranches;
401+
402+
BNArchitectureAndAddress* locations = new BNArchitectureAndAddress[inlinedUnresolvedIndirectBranches.size() * 2];
403+
404+
size_t i = 0;
405+
for (auto& pair : inlinedUnresolvedIndirectBranches)
406+
{
407+
locations[i].arch = pair.first.arch->GetObject();
408+
locations[i].address = pair.first.address;
409+
410+
locations[i + 1].arch = pair.second.arch->GetObject();
411+
locations[i + 1].address = pair.second.address;
412+
413+
i += 2;
414+
}
415+
416+
BNAnalyzeBasicBlocksContextSetInlinedUnresolvedIndirectBranches(m_context, locations, inlinedUnresolvedIndirectBranches.size() * 2);
417+
418+
delete[] locations;
419+
}
420+
389421
if (m_contextualReturns)
390422
{
391423
auto& contextualReturns = *m_contextualReturns;

binaryninjaapi.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8137,6 +8137,7 @@ namespace BinaryNinja {
81378137
std::optional<std::map<uint64_t, std::set<ArchAndAddr>>> m_directCodeReferences;
81388138
std::optional<std::set<ArchAndAddr>> m_directNoReturnCalls;
81398139
std::optional<std::set<ArchAndAddr>> m_haltedDisassemblyAddresses;
8140+
std::optional<std::map<ArchAndAddr, ArchAndAddr>> m_inlinedUnresolvedIndirectBranches;
81408141

81418142
public:
81428143
BNBasicBlockAnalysisContext* m_context;
@@ -8160,6 +8161,7 @@ namespace BinaryNinja {
81608161
std::map<uint64_t, std::set<ArchAndAddr>>& GetDirectCodeReferences();
81618162
std::set<ArchAndAddr>& GetDirectNoReturnCalls();
81628163
std::set<ArchAndAddr>& GetHaltedDisassemblyAddresses();
8164+
std::map<ArchAndAddr, ArchAndAddr>& GetInlinedUnresolvedIndirectBranches();
81638165

81648166
void AddTempOutgoingReference(Function* targetFunc);
81658167

@@ -11495,7 +11497,7 @@ namespace BinaryNinja {
1149511497

1149611498
Ref<Function> GetCalleeForAnalysis(Ref<Platform> platform, uint64_t addr, bool exact);
1149711499

11498-
std::vector<uint64_t> GetUnresolvedIndirectBranches();
11500+
std::vector<ArchAndAddr> GetUnresolvedIndirectBranches();
1149911501
bool HasUnresolvedIndirectBranches();
1150011502

1150111503
void SetAutoCallTypeAdjustment(Architecture* arch, uint64_t addr, const Confidence<Ref<Type>>& adjust);

binaryninjacore.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,14 @@
3737
// Current ABI version for linking to the core. This is incremented any time
3838
// there are changes to the API that affect linking, including new functions,
3939
// new types, or modifications to existing functions or types.
40-
#define BN_CURRENT_CORE_ABI_VERSION 116
40+
#define BN_CURRENT_CORE_ABI_VERSION 117
4141

4242
// Minimum ABI version that is supported for loading of plugins. Plugins that
4343
// are linked to an ABI version less than this will not be able to load and
4444
// will require rebuilding. The minimum version is increased when there are
4545
// incompatible changes that break binary compatibility, such as changes to
4646
// existing types or functions.
47-
#define BN_MINIMUM_CORE_ABI_VERSION 116
47+
#define BN_MINIMUM_CORE_ABI_VERSION 117
4848

4949
#ifdef __GNUC__
5050
#ifdef BINARYNINJACORE_LIBRARY
@@ -1904,6 +1904,9 @@ extern "C"
19041904

19051905
size_t haltedDisassemblyAddressesCount;
19061906
BNArchitectureAndAddress* haltedDisassemblyAddresses;
1907+
1908+
size_t inlinedUnresolvedIndirectBranchCount;
1909+
BNArchitectureAndAddress* inlinedUnresolvedIndirectBranches;
19071910
} BNBasicBlockAnalysisContext;
19081911

19091912
typedef struct BNCustomArchitecture
@@ -5141,7 +5144,7 @@ extern "C"
51415144
BINARYNINJACOREAPI BNFunction* BNGetCalleeForAnalysis(BNFunction* func, BNPlatform* platform,
51425145
uint64_t addr, bool exact);
51435146

5144-
BINARYNINJACOREAPI uint64_t* BNGetUnresolvedIndirectBranches(BNFunction* func, size_t* count);
5147+
BINARYNINJACOREAPI BNArchitectureAndAddress* BNGetUnresolvedIndirectBranches(BNFunction* func, size_t* count);
51455148
BINARYNINJACOREAPI bool BNHasUnresolvedIndirectBranches(BNFunction* func);
51465149

51475150
BINARYNINJACOREAPI void BNFunctionToggleRegion(BNFunction* func, uint64_t hash);
@@ -5214,6 +5217,7 @@ extern "C"
52145217
BINARYNINJACOREAPI void BNAnalyzeBasicBlocksContextSetDirectNoReturnCalls(BNBasicBlockAnalysisContext* abb, BNArchitectureAndAddress* sources, size_t count);
52155218
BINARYNINJACOREAPI void BNAnalyzeBasicBlocksContextSetContextualFunctionReturns(BNBasicBlockAnalysisContext* abb, BNArchitectureAndAddress* sources, bool* values, size_t count);
52165219
BINARYNINJACOREAPI void BNAnalyzeBasicBlocksContextSetHaltedDisassemblyAddresses(BNBasicBlockAnalysisContext* abb, BNArchitectureAndAddress* sources, size_t count);
5220+
BINARYNINJACOREAPI void BNAnalyzeBasicBlocksContextSetInlinedUnresolvedIndirectBranches(BNBasicBlockAnalysisContext* abb, BNArchitectureAndAddress* locations, size_t count);
52175221

52185222
BINARYNINJACOREAPI BNAnalysisParameters BNGetParametersForAnalysis(BNBinaryView* view);
52195223
BINARYNINJACOREAPI void BNSetParametersForAnalysis(BNBinaryView* view, BNAnalysisParameters params);

defaultabb.cpp

Lines changed: 96 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ void Architecture::DefaultAnalyzeBasicBlocks(Function* function, BasicBlockAnaly
7575
auto& directRefs = context.GetDirectCodeReferences();
7676
auto& directNoReturnCalls = context.GetDirectNoReturnCalls();
7777
auto& haltedDisassemblyAddresses = context.GetHaltedDisassemblyAddresses();
78+
auto& inlinedUnresolvedIndirectBranches = context.GetInlinedUnresolvedIndirectBranches();
7879

7980
bool hasInvalidInstructions = false;
8081
set<ArchAndAddr> guidedSourceBlockTargets;
@@ -325,6 +326,81 @@ void Architecture::DefaultAnalyzeBasicBlocks(Function* function, BasicBlockAnaly
325326
{
326327
bool fastPath;
327328

329+
auto handleAsFallback = [&]() {
330+
// Undefined type or target, check for targets from analysis and stop disassembling this block
331+
endsBlock = true;
332+
333+
if (info.branchType[i] == IndirectBranch)
334+
{
335+
// Indirect calls need not end the block early.
336+
Ref<LowLevelILFunction> ilFunc = new LowLevelILFunction(location.arch, nullptr);
337+
location.arch->GetInstructionLowLevelIL(opcode, location.address, maxLen, *ilFunc);
338+
for (size_t idx = 0; idx < ilFunc->GetInstructionCount(); idx++)
339+
{
340+
if ((*ilFunc)[idx].operation == LLIL_CALL)
341+
{
342+
endsBlock = false;
343+
break;
344+
}
345+
}
346+
}
347+
348+
indirectBranchIter = indirectBranches.find(location);
349+
endIter = indirectBranches.end();
350+
if (indirectBranchIter != endIter)
351+
{
352+
for (auto& branch : indirectBranchIter->second)
353+
{
354+
directRefs[branch.address].emplace(location);
355+
Ref<Platform> targetPlatform = funcPlatform;
356+
if (branch.arch != function->GetArchitecture())
357+
targetPlatform = funcPlatform->GetRelatedPlatform(branch.arch);
358+
359+
// Normal analysis should not inline indirect targets that are function starts
360+
if (translateTailCalls && data->GetAnalysisFunction(targetPlatform, branch.address))
361+
continue;
362+
363+
if (isGuidedSourceBlock)
364+
guidedSourceBlockTargets.insert(branch);
365+
366+
block->AddPendingOutgoingEdge(IndirectBranch, branch.address, branch.arch);
367+
if (seenBlocks.count(branch) == 0)
368+
{
369+
blocksToProcess.push(branch);
370+
seenBlocks.insert(branch);
371+
}
372+
}
373+
}
374+
else if (info.branchType[i] == ExceptionBranch)
375+
{
376+
block->SetCanExit(false);
377+
}
378+
else if (info.branchType[i] == FunctionReturn && function->CanReturn().GetValue())
379+
{
380+
// Support for contextual function returns. This is mainly used for ARM/Thumb with 'blx lr'. It's most common for this to be treated
381+
// as a function return, however it can also be a function call. For now this transform is described as follows:
382+
// 1) Architecture lifts a call instruction as LLIL_CALL with a branch type of FunctionReturn
383+
// 2) By default, contextualFunctionReturns is used to translate this to a LLIL_RET (conservative)
384+
// 3) Downstream analysis uses dataflow to validate the return target
385+
// 4) If the target is not the ReturnAddressValue, then we avoid the translation to a return and leave the instruction as a call
386+
if (auto it = contextualFunctionReturns.find(location); it != contextualFunctionReturns.end())
387+
endsBlock = it->second;
388+
else
389+
{
390+
Ref<LowLevelILFunction> ilFunc = new LowLevelILFunction(location.arch, nullptr);
391+
location.arch->GetInstructionLowLevelIL(opcode, location.address, maxLen, *ilFunc);
392+
if (ilFunc->GetInstructionCount() && ((*ilFunc)[0].operation == LLIL_CALL))
393+
contextualFunctionReturns[location] = true;
394+
}
395+
}
396+
else
397+
{
398+
// If analysis did not find any valid branch targets, don't assume anything about global
399+
// function state, such as __noreturn analysis, since we can't see the entire function->
400+
block->SetUndeterminedOutgoingEdges(true);
401+
}
402+
};
403+
328404
switch (info.branchType[i])
329405
{
330406
case UnconditionalBranch:
@@ -375,7 +451,7 @@ void Architecture::DefaultAnalyzeBasicBlocks(Function* function, BasicBlockAnaly
375451
calledFunctions.insert(otherFunc);
376452
if (info.branchType[i] == UnconditionalBranch)
377453
{
378-
if (!otherFunc->CanReturn())
454+
if (!otherFunc->CanReturn() && !otherFunc->IsInlinedDuringAnalysis().GetValue())
379455
{
380456
directNoReturnCalls.insert(location);
381457
endsBlock = true;
@@ -465,98 +541,39 @@ void Architecture::DefaultAnalyzeBasicBlocks(Function* function, BasicBlockAnaly
465541
break;
466542
}
467543

468-
directRefs[target.address].emplace(location);
469-
if (!func->CanReturn())
470-
{
471-
directNoReturnCalls.insert(location);
472-
endsBlock = true;
473-
block->SetCanExit(false);
474-
}
475544

476545
// Add function as an early reference in case it gets updated before this
477546
// function finishes analysis.
478547
context.AddTempOutgoingReference(func);
479548

480549
calledFunctions.emplace(func);
481-
}
482-
break;
483550

484-
case SystemCall:
485-
break;
486-
487-
default:
488-
// Undefined type or target, check for targets from analysis and stop disassembling this block
489-
endsBlock = true;
490-
491-
if (info.branchType[i] == IndirectBranch)
492-
{
493-
// Indirect calls need not end the block early.
494-
Ref<LowLevelILFunction> ilFunc = new LowLevelILFunction(location.arch, nullptr);
495-
location.arch->GetInstructionLowLevelIL(opcode, location.address, maxLen, *ilFunc);
496-
for (size_t idx = 0; idx < ilFunc->GetInstructionCount(); idx++)
551+
directRefs[target.address].emplace(location);
552+
if (!func->CanReturn())
497553
{
498-
if ((*ilFunc)[idx].operation == LLIL_CALL)
554+
if (func->IsInlinedDuringAnalysis().GetValue() && func->HasUnresolvedIndirectBranches())
499555
{
500-
endsBlock = false;
501-
break;
556+
auto unresolved = func->GetUnresolvedIndirectBranches();
557+
if (unresolved.size() == 1)
558+
{
559+
inlinedUnresolvedIndirectBranches[location] = *unresolved.begin();
560+
handleAsFallback();
561+
break;
562+
}
502563
}
564+
565+
directNoReturnCalls.insert(location);
566+
endsBlock = true;
567+
block->SetCanExit(false);
503568
}
504569
}
570+
break;
505571

506-
indirectBranchIter = indirectBranches.find(location);
507-
endIter = indirectBranches.end();
508-
if (indirectBranchIter != endIter)
509-
{
510-
for (auto& branch : indirectBranchIter->second)
511-
{
512-
directRefs[branch.address].emplace(location);
513-
Ref<Platform> targetPlatform = funcPlatform;
514-
if (branch.arch != function->GetArchitecture())
515-
targetPlatform = funcPlatform->GetRelatedPlatform(branch.arch);
516-
517-
// Normal analysis should not inline indirect targets that are function starts
518-
if (translateTailCalls && data->GetAnalysisFunction(targetPlatform, branch.address))
519-
continue;
520-
521-
if (isGuidedSourceBlock)
522-
guidedSourceBlockTargets.insert(branch);
572+
case SystemCall:
573+
break;
523574

524-
block->AddPendingOutgoingEdge(IndirectBranch, branch.address, branch.arch);
525-
if (seenBlocks.count(branch) == 0)
526-
{
527-
blocksToProcess.push(branch);
528-
seenBlocks.insert(branch);
529-
}
530-
}
531-
}
532-
else if (info.branchType[i] == ExceptionBranch)
533-
{
534-
block->SetCanExit(false);
535-
}
536-
else if (info.branchType[i] == FunctionReturn)
537-
{
538-
// Support for contextual function returns. This is mainly used for ARM/Thumb with 'blx lr'. It's most common for this to be treated
539-
// as a function return, however it can also be a function call. For now this transform is described as follows:
540-
// 1) Architecture lifts a call instruction as LLIL_CALL with a branch type of FunctionReturn
541-
// 2) By default, contextualFunctionReturns is used to translate this to a LLIL_RET (conservative)
542-
// 3) Downstream analysis uses dataflow to validate the return target
543-
// 4) If the target is not the ReturnAddressValue, then we avoid the translation to a return and leave the instruction as a call
544-
if (auto it = contextualFunctionReturns.find(location); it != contextualFunctionReturns.end())
545-
endsBlock = it->second;
546-
else
547-
{
548-
Ref<LowLevelILFunction> ilFunc = new LowLevelILFunction(location.arch, nullptr);
549-
location.arch->GetInstructionLowLevelIL(opcode, location.address, maxLen, *ilFunc);
550-
if (ilFunc->GetInstructionCount() && ((*ilFunc)[0].operation == LLIL_CALL))
551-
contextualFunctionReturns[location] = true;
552-
}
553-
}
554-
else
555-
{
556-
// If analysis did not find any valid branch targets, don't assume anything about global
557-
// function state, such as __noreturn analysis, since we can't see the entire function->
558-
block->SetUndeterminedOutgoingEdges(true);
559-
}
575+
default:
576+
handleAsFallback();
560577
break;
561578
}
562579
}

function.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1852,13 +1852,15 @@ Ref<Function> Function::GetCalleeForAnalysis(Ref<Platform> platform, uint64_t ad
18521852
}
18531853

18541854

1855-
vector<uint64_t> Function::GetUnresolvedIndirectBranches()
1855+
vector<ArchAndAddr> Function::GetUnresolvedIndirectBranches()
18561856
{
18571857
size_t count;
1858-
uint64_t* addrs = BNGetUnresolvedIndirectBranches(m_object, &count);
1859-
vector<uint64_t> result;
1860-
result.insert(result.end(), addrs, &addrs[count]);
1861-
BNFreeAddressList(addrs);
1858+
BNArchitectureAndAddress* addresses = BNGetUnresolvedIndirectBranches(m_object, &count);
1859+
vector<ArchAndAddr> result;
1860+
result.reserve(count);
1861+
for (size_t i = 0; i < count; i++)
1862+
result.push_back({new CoreArchitecture(addresses[i].arch), addresses[i].address});
1863+
BNFreeArchitectureAndAddressList(addresses);
18621864
return result;
18631865
}
18641866

python/function.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1277,18 +1277,22 @@ def indirect_branches(self) -> List['variable.IndirectBranchInfo']:
12771277
return result
12781278

12791279
@property
1280-
def unresolved_indirect_branches(self) -> List[int]:
1280+
def unresolved_indirect_branches(self) -> List[Tuple['architecture.Architecture', int]]:
12811281
"""List of unresolved indirect branches (read-only)"""
12821282
count = ctypes.c_ulonglong()
1283-
addrs = core.BNGetUnresolvedIndirectBranches(self.handle, count)
1284-
assert addrs is not None, "core.BNGetUnresolvedIndirectBranches returned None"
1283+
addresses = core.BNGetUnresolvedIndirectBranches(self.handle, count)
12851284
try:
1285+
assert addresses is not None, "core.BNGetUnresolvedIndirectBranches returned None"
12861286
result = []
1287-
for i in range(0, count.value):
1288-
result.append(addrs[i])
1287+
for i in range(count.value):
1288+
result.append((
1289+
architecture.CoreArchitecture._from_cache(addresses[i].arch),
1290+
addresses[i].address
1291+
))
12891292
return result
12901293
finally:
1291-
core.BNFreeAddressList(addrs)
1294+
if addresses is not None:
1295+
core.BNFreeArchitectureAndAddressList(addresses)
12921296

12931297
@property
12941298
def has_unresolved_indirect_branches(self) -> bool:

rust/src/function.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2372,11 +2372,13 @@ impl Function {
23722372
}
23732373

23742374
/// List of address of unresolved indirect branches
2375-
pub fn unresolved_indirect_branches(&self) -> Array<UnresolvedIndirectBranches> {
2375+
/*
2376+
pub fn unresolved_indirect_branches(&self) -> Array<Arch> {
23762377
let mut count = 0;
23772378
let result = unsafe { BNGetUnresolvedIndirectBranches(self.handle, &mut count) };
23782379
unsafe { Array::new(result, count, ()) }
23792380
}
2381+
*/
23802382

23812383
/// Returns a string representing the provenance. This portion of the API
23822384
/// is under development. Currently the provenance information is

0 commit comments

Comments
 (0)