Skip to content

Commit 62b96c1

Browse files
committed
[Rust] WIP: Improve situation around LLIL rewrites
Still working on it, will also need to refactor MLIL and HLIL to have the appropriate expression rewrite stuff
1 parent 4e2ad62 commit 62b96c1

File tree

10 files changed

+363
-166
lines changed

10 files changed

+363
-166
lines changed

rust/examples/workflow.rs

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use binaryninja::binary_view::BinaryViewExt;
22
use binaryninja::low_level_il::expression::{ExpressionHandler, LowLevelILExpressionKind};
3-
use binaryninja::low_level_il::instruction::InstructionHandler;
3+
use binaryninja::low_level_il::instruction::{InstructionHandler, LowLevelInstructionIndex};
44
use binaryninja::low_level_il::VisitorAction;
55
use binaryninja::workflow::{Activity, AnalysisContext, Workflow};
66

@@ -32,8 +32,15 @@ fn example_activity(analysis_context: &AnalysisContext) {
3232
if let LowLevelILExpressionKind::Const(_op) = expr.kind() {
3333
// Replace all consts with 0x1337.
3434
println!("Replacing llil expression @ 0x{:x} : {}", instr, expr.index);
35+
// When rewriting, expressions make sure to set the new expressions address.
36+
let src_operand = expr.kind().source_operand();
37+
println!("Source operand: {:?}", src_operand);
3538
unsafe {
36-
llil.replace_expression(expr.index, llil.const_int(4, 0x1337))
39+
llil.replace_expression(
40+
expr.index,
41+
llil.const_int(4, 0x1337)
42+
.with_address(expr.kind().address()),
43+
)
3744
};
3845
}
3946
VisitorAction::Descend
@@ -68,16 +75,21 @@ pub fn main() {
6875
// traverse all llil expressions and look for the constant 0x1337
6976
for func in &bv.functions() {
7077
if let Ok(llil) = func.low_level_il() {
78+
let last_llil_instr =
79+
llil.instruction_from_index(LowLevelInstructionIndex(llil.instruction_count() - 1));
80+
let last_llil_instr_addr = last_llil_instr.unwrap().address();
7181
for block in &llil.basic_blocks() {
7282
for instr in block.iter() {
7383
instr.visit_tree(&mut |expr| {
7484
if let LowLevelILExpressionKind::Const(value) = expr.kind() {
7585
if value.value() == 0x1337 {
7686
println!(
77-
"Found constant 0x1337 at instruction 0x{:x} in function {}",
87+
"Found constant 0x1337 at instruction 0x{:x} in function 0x{:x}",
7888
instr.address(),
7989
func.start()
8090
);
91+
// When rewriting, expressions make sure to set the address.
92+
assert_ne!(expr.kind().address(), last_llil_instr_addr, "Replaced expression address is not set");
8193
}
8294
}
8395
VisitorAction::Descend

rust/src/architecture.rs

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,18 @@ macro_rules! new_id_type {
8080
}
8181
}
8282

83+
impl From<u64> for $name {
84+
fn from(value: u64) -> Self {
85+
Self(value as $inner_type)
86+
}
87+
}
88+
89+
impl From<$name> for u64 {
90+
fn from(value: $name) -> Self {
91+
value.0 as u64
92+
}
93+
}
94+
8395
impl std::fmt::Display for $name {
8496
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
8597
write!(f, "{}", self.0)
@@ -118,21 +130,37 @@ pub trait Architecture: 'static + Sized + AsRef<CoreArchitecture> {
118130
fn max_instr_len(&self) -> usize;
119131
fn opcode_display_len(&self) -> usize;
120132

121-
fn associated_arch_by_addr(&self, addr: u64) -> CoreArchitecture;
133+
fn associated_arch_by_addr(&self, _addr: u64) -> CoreArchitecture {
134+
self.as_ref().handle()
135+
}
122136

137+
/// Returns the [`InstructionInfo`] at the given virtual address with `data`.
138+
///
139+
/// The [`InstructionInfo`] object should always fill the proper length and branches if not, the
140+
/// next instruction will likely be incorrect.
123141
fn instruction_info(&self, data: &[u8], addr: u64) -> Option<InstructionInfo>;
142+
143+
/// Returns a of list of [`InstructionTextToken`]'s representing the instruction at the
144+
/// given virtual address with `data`.
124145
fn instruction_text(
125146
&self,
126147
data: &[u8],
127148
addr: u64,
128149
) -> Option<(usize, Vec<InstructionTextToken>)>;
150+
151+
/// Appends arbitrary low level il instructions representing the semantics of the instruction at
152+
/// the given virtual address with `data`.
129153
fn instruction_llil(
130154
&self,
131155
data: &[u8],
132156
addr: u64,
133157
il: &LowLevelILMutableFunction,
134158
) -> Option<(usize, bool)>;
135159

160+
/// Performs basic block recovery and commits the results to the function analysis.
161+
///
162+
/// NOTE: Only implement this method if function-level analysis is required. Otherwise, do not
163+
/// implement to let default basic block analysis take place.
136164
fn analyze_basic_blocks(
137165
&self,
138166
function: &mut Function,

rust/src/architecture/basic_block.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ impl BasicBlockAnalysisContext {
119119
}
120120
}
121121

122+
/// Adds a contextual function return location and its value to the current function.
122123
pub fn add_contextual_return(&mut self, loc: impl Into<Location>, value: bool) {
123124
let loc = loc.into();
124125
if !self.contextual_returns.contains_key(&loc) {
@@ -128,20 +129,24 @@ impl BasicBlockAnalysisContext {
128129
self.contextual_returns.insert(loc, value);
129130
}
130131

132+
/// Adds a direct code reference to the current function.
131133
pub fn add_direct_code_reference(&mut self, target: u64, source: impl Into<Location>) {
132134
self.direct_code_references
133135
.entry(target)
134136
.or_insert(source.into());
135137
}
136138

139+
/// Adds a direct no-return call location to the current function.
137140
pub fn add_direct_no_return_call(&mut self, loc: impl Into<Location>) {
138141
self.direct_no_return_calls.insert(loc.into());
139142
}
140143

144+
/// Adds an address to the set of halted disassembly addresses.
141145
pub fn add_halted_disassembly_address(&mut self, loc: impl Into<Location>) {
142146
self.halted_disassembly_addresses.insert(loc.into());
143147
}
144148

149+
/// Creates a new [`BasicBlock`] at the specified address for the given [`CoreArchitecture`].
145150
pub fn create_basic_block(
146151
&self,
147152
arch: CoreArchitecture,
@@ -157,12 +162,14 @@ impl BasicBlockAnalysisContext {
157162
unsafe { Some(BasicBlock::ref_from_raw(raw_block, NativeBlock::new())) }
158163
}
159164

165+
/// Adds a [`BasicBlock`] to the current function.
160166
pub fn add_basic_block(&self, block: Ref<BasicBlock<NativeBlock>>) {
161167
unsafe {
162168
BNAnalyzeBasicBlocksContextAddBasicBlockToFunction(self.handle, block.handle);
163169
}
164170
}
165171

172+
/// Adds a temporary outgoing reference to the specified function.
166173
pub fn add_temp_outgoing_reference(&self, target: &Function) {
167174
unsafe {
168175
BNAnalyzeBasicBlocksContextAddTempReference(self.handle, target.handle);
@@ -241,6 +248,7 @@ impl BasicBlockAnalysisContext {
241248
}
242249
}
243250

251+
/// Finalizes the function's basic block analysis.
244252
pub fn finalize(&mut self) {
245253
if !self.direct_code_references.is_empty() {
246254
self.update_direct_code_references();

rust/src/low_level_il.rs

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,10 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15+
use binaryninjacore_sys::BNLowLevelILInstruction;
1516
use std::borrow::Cow;
1617
use std::fmt;
17-
18+
use std::fmt::{Display, Formatter};
1819
// TODO : provide some way to forbid emitting register reads for certain registers
1920
// also writing for certain registers (e.g. zero register must prohibit il.set_reg and il.reg
2021
// (replace with nop or const(0) respectively)
@@ -157,6 +158,30 @@ impl From<LowLevelILTempRegister> for LowLevelILRegisterKind<CoreRegister> {
157158
}
158159
}
159160

161+
impl From<CoreRegister> for LowLevelILRegisterKind<CoreRegister> {
162+
fn from(reg: CoreRegister) -> Self {
163+
LowLevelILRegisterKind::Arch(reg)
164+
}
165+
}
166+
167+
impl PartialEq<CoreRegister> for LowLevelILRegisterKind<CoreRegister> {
168+
fn eq(&self, other: &CoreRegister) -> bool {
169+
match *self {
170+
LowLevelILRegisterKind::Arch(ref r) => r == other,
171+
LowLevelILRegisterKind::Temp(_) => false,
172+
}
173+
}
174+
}
175+
176+
impl PartialEq<LowLevelILTempRegister> for LowLevelILRegisterKind<CoreRegister> {
177+
fn eq(&self, other: &LowLevelILTempRegister) -> bool {
178+
match *self {
179+
LowLevelILRegisterKind::Arch(_) => false,
180+
LowLevelILRegisterKind::Temp(ref r) => r == other,
181+
}
182+
}
183+
}
184+
160185
#[derive(Copy, Clone, Debug)]
161186
pub enum LowLevelILSSARegisterKind<R: ArchReg> {
162187
Full {
@@ -209,3 +234,52 @@ pub enum VisitorAction {
209234
Sibling,
210235
Halt,
211236
}
237+
238+
#[repr(transparent)]
239+
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
240+
pub struct LowLevelILOperandIndex(pub u32);
241+
242+
impl LowLevelILOperandIndex {
243+
pub fn next(&self) -> Self {
244+
Self(self.0 + 1)
245+
}
246+
}
247+
248+
impl TryFrom<u32> for LowLevelILOperandIndex {
249+
type Error = ();
250+
251+
fn try_from(value: u32) -> Result<Self, Self::Error> {
252+
match value {
253+
7 => Err(()),
254+
value => Ok(Self(value)),
255+
}
256+
}
257+
}
258+
259+
impl Display for LowLevelILOperandIndex {
260+
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
261+
f.write_fmt(format_args!("{}", self.0))
262+
}
263+
}
264+
265+
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
266+
pub struct LowLevelILSourceLocation {
267+
pub address: u64,
268+
/// The referenced source operand.
269+
pub source_operand: Option<LowLevelILOperandIndex>,
270+
}
271+
272+
impl LowLevelILSourceLocation {
273+
pub fn raw_source_operand(&self) -> u32 {
274+
self.source_operand.unwrap_or(LowLevelILOperandIndex(7)).0
275+
}
276+
}
277+
278+
impl From<&BNLowLevelILInstruction> for LowLevelILSourceLocation {
279+
fn from(value: &BNLowLevelILInstruction) -> Self {
280+
Self {
281+
address: value.address,
282+
source_operand: value.sourceOperand.try_into().ok(),
283+
}
284+
}
285+
}

rust/src/low_level_il/expression.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,18 @@ use std::fmt;
2828
use std::fmt::{Debug, Display, Formatter};
2929
use std::marker::PhantomData;
3030

31+
pub trait ExpressionResultType: 'static + Debug {}
32+
3133
/// Used as a marker for an [`LowLevelILExpression`] that **can** produce a value.
3234
#[derive(Copy, Clone, Debug)]
3335
pub struct ValueExpr;
3436

37+
impl ExpressionResultType for ValueExpr {}
38+
3539
/// Used as a marker for an [`LowLevelILExpression`] that can **not** produce a value.
3640
#[derive(Copy, Clone, Debug)]
3741
pub struct VoidExpr;
3842

39-
pub trait ExpressionResultType: 'static + Debug {}
40-
impl ExpressionResultType for ValueExpr {}
4143
impl ExpressionResultType for VoidExpr {}
4244

4345
#[repr(transparent)]
@@ -574,6 +576,10 @@ where
574576
self.raw_struct().address
575577
}
576578

579+
pub fn source_operand(&self) -> u32 {
580+
self.raw_struct().sourceOperand
581+
}
582+
577583
/// Determines if the expressions represent the same operation
578584
///
579585
/// It does not examine the operands for equality.

rust/src/low_level_il/function.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,17 @@ where
159159
}
160160
}
161161

162+
pub fn instruction_from_expr_index(
163+
&self,
164+
index: LowLevelExpressionIndex,
165+
) -> Option<LowLevelILInstruction<M, F>> {
166+
if index.0 >= self.expression_count() {
167+
None
168+
} else {
169+
Some(LowLevelILInstruction::new_with_expr_index(self, index))
170+
}
171+
}
172+
162173
pub fn expression_count(&self) -> usize {
163174
unsafe {
164175
use binaryninjacore_sys::BNGetLowLevelILExprCount;

0 commit comments

Comments
 (0)