-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathlib.rs
More file actions
488 lines (449 loc) · 18.5 KB
/
lib.rs
File metadata and controls
488 lines (449 loc) · 18.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
//! Python bindings for the boreal library.
#![allow(unsafe_code)]
// The changes recommended by this lint does not integrate well
// with python docstrings.
#![allow(clippy::doc_markdown)]
use std::ffi::CString;
use std::path::Path;
use std::sync::atomic::{AtomicBool, Ordering};
use parking_lot::Mutex;
use pyo3::exceptions::{PyException, PyTypeError};
use pyo3::prelude::*;
use pyo3::types::{PyDict, PyString};
use pyo3::{create_exception, ffi, intern};
use ::boreal::compiler;
mod module;
mod rule;
mod rule_match;
mod rule_string;
mod scanner;
mod string_match_instance;
mod string_matches;
create_exception!(boreal, Error, PyException, "Generic boreal error");
create_exception!(
boreal,
AddRuleError,
Error,
"Raised when failing to compile a rule"
);
static MAX_STRINGS_PER_RULE: Mutex<Option<usize>> = Mutex::new(None);
static MATCH_MAX_LENGTH: Mutex<Option<usize>> = Mutex::new(None);
static YARA_PYTHON_COMPATIBILITY: AtomicBool = AtomicBool::new(false);
const CALLBACK_CONTINUE: u32 = 0;
const CALLBACK_ABORT: u32 = 1;
const CALLBACK_MATCHES: u32 = 0x01;
const CALLBACK_NON_MATCHES: u32 = 0x02;
const CALLBACK_ALL: u32 = CALLBACK_MATCHES | CALLBACK_NON_MATCHES;
// Same value as declared in yara, for compatibility.
const CALLBACK_TOO_MANY_MATCHES: u32 = 6;
/// Python bindings for the YARA scanner boreal.
#[pymodule(gil_used = false)]
fn boreal(m: &Bound<'_, PyModule>) -> PyResult<()> {
let py = m.py();
m.add_function(wrap_pyfunction!(compile, m)?)?;
m.add_function(wrap_pyfunction!(set_config, m)?)?;
#[cfg(feature = "serialize")]
m.add_function(wrap_pyfunction!(load, m)?)?;
m.add("modules", get_available_modules(py))?;
m.add("__version__", env!("CARGO_PKG_VERSION"))?;
m.add("CALLBACK_CONTINUE", CALLBACK_CONTINUE)?;
m.add("CALLBACK_ABORT", CALLBACK_ABORT)?;
m.add("CALLBACK_MATCHES", CALLBACK_MATCHES)?;
m.add("CALLBACK_NON_MATCHES", CALLBACK_NON_MATCHES)?;
m.add("CALLBACK_ALL", CALLBACK_ALL)?;
m.add("CALLBACK_TOO_MANY_MATCHES", CALLBACK_TOO_MANY_MATCHES)?;
m.add("Error", py.get_type::<Error>())?;
m.add("AddRuleError", py.get_type::<AddRuleError>())?;
// Add an alias for SyntaxError: this provides compatibility
// with code using yara.
m.add("SyntaxError", py.get_type::<AddRuleError>())?;
m.add("ScanError", py.get_type::<scanner::ScanError>())?;
m.add("TimeoutError", py.get_type::<scanner::TimeoutError>())?;
m.add("Rule", py.get_type::<rule::Rule>())?;
m.add("Match", py.get_type::<rule_match::Match>())?;
m.add("Scanner", py.get_type::<scanner::Scanner>())?;
m.add("RulesIter", py.get_type::<scanner::RulesIter>())?;
m.add(
"StringMatchInstance",
py.get_type::<string_match_instance::StringMatchInstance>(),
)?;
m.add(
"StringMatches",
py.get_type::<string_matches::StringMatches>(),
)?;
m.add("RuleString", py.get_type::<rule_string::RuleString>())?;
m.add("CompilerProfile", py.get_type::<CompilerProfile>())?;
Ok(())
}
/// Compile YARA rules and generate a Scanner object.
///
/// One of `filepath`, `filepaths`, `source`, `sources`
/// or `file` must be passed.
///
/// Args:
/// filepath: Path to a file containing the rules to compile.
/// filepaths: Dictionary where the value is a path to a file, containing
/// rules to compile, and the key is the name of the namespace that
/// will contain those rules.
/// source: String containing the rules to compile.
/// sources: Dictionary where the value is a string containing the rules
/// to compile, and the key is the name of the namespace that will
/// contain those rules.
/// file: An opened file containing the rules to compile. This can be any
/// object that exposes a `read` method.
/// externals: Dictionary of externals symbols to make available during
/// compilation. The key is the name of the external symbol, and the
/// value is the original value to assign to this symbol. This original
/// value can be replaced during scanning by specifying an `externals`
/// dictionary, see the `Scanner::match` method.
/// includes: Allow rules to use the `include` directive. If set to False,
/// any use of the `include` directive will result in a compilation
/// error.
/// error_on_warning: If true, make the compilation fail when a warning
/// is emitted. If false, warnings can be found in the resulting
/// `Scanner` object, see `Scanner::warnings`.
/// include_callback: If specified, this callback is used to resolve
/// callbacks. The callback will receive three arguments:
/// - The path being included.
/// - The path of the current document. Can be None if the current
/// document was specified as a string, such as when using the
/// `source` or `sources` parameter.
/// - The current namespace.
/// The callback must return a string which is the included document.
/// strict_escape: If true, invalid escape sequences in regexes will
/// generate warnings. The default value depends on the yara
/// compatibility mode: it is False if in compat mode, or True
/// otherwise.
/// profile: Profile to use when compiling the rules. If not specified,
/// `CompilerProfile::Speed` is used.
/// max_strings_per_rule: Maximum number of strings allowed in a single rule.
/// If a rule has more strings than this limit, its compilation will fail.
/// max_condition_depth: Maximum depth in a rule's condition AST.
/// Defensive limit used to prevent stack overflow.
/// parse_expression_recursion_limit: Maximum recursion depth allowed when
/// parsing an expression.
/// Defensive limit used to prevent stack overflow.
/// parse_string_recursion_limit: Maximum recursion depth allowed when
/// parsing a regex or a hex-string.
/// Defensive limit used to prevent stack overflow.
///
/// Returns:
/// a `Scanner` object that holds the compiled rules.
///
/// Raises:
/// TypeError: A provided argument has the wrong type, or none
/// of the input arguments were provided.
/// boreal.AddRuleError: A rule failed to compile.
#[pyfunction]
#[pyo3(signature = (
filepath=None,
filepaths=None,
source=None,
sources=None,
file=None,
externals=None,
includes=true,
error_on_warning=false,
include_callback=None,
strict_escape=None,
profile=None,
max_strings_per_rule=None,
max_condition_depth=None,
parse_expression_recursion_limit=None,
parse_string_recursion_limit=None,
))]
#[allow(clippy::too_many_arguments)]
fn compile(
filepath: Option<&str>,
filepaths: Option<&Bound<'_, PyDict>>,
source: Option<&str>,
sources: Option<&Bound<'_, PyDict>>,
file: Option<&Bound<'_, PyAny>>,
externals: Option<&Bound<'_, PyDict>>,
includes: bool,
error_on_warning: bool,
include_callback: Option<&Bound<'_, PyAny>>,
strict_escape: Option<bool>,
profile: Option<&CompilerProfile>,
max_strings_per_rule: Option<usize>,
max_condition_depth: Option<u32>,
parse_expression_recursion_limit: Option<u8>,
parse_string_recursion_limit: Option<u8>,
) -> PyResult<scanner::Scanner> {
let mut compiler = build_compiler(profile);
// By default, enable strict escape, this is the default behavior in boreal.
// If in yara compat mode, use the yara default behavior and disable it.
let disable_unknown_escape_warning = match strict_escape {
Some(v) => !v,
None => YARA_PYTHON_COMPATIBILITY.load(Ordering::SeqCst),
};
let mut params = compiler::CompilerParams::default()
.disable_includes(!includes)
.fail_on_warnings(error_on_warning)
.disable_unknown_escape_warning(disable_unknown_escape_warning);
let max_strings_per_rule = max_strings_per_rule.or_else(|| *MAX_STRINGS_PER_RULE.lock());
if let Some(value) = max_strings_per_rule {
params = params.max_strings_per_rule(value);
}
if let Some(limit) = max_condition_depth {
params = params.max_condition_depth(limit);
}
if let Some(limit) = parse_expression_recursion_limit {
params = params.parse_expression_recursion_limit(limit);
}
if let Some(limit) = parse_string_recursion_limit {
params = params.parse_string_recursion_limit(limit);
}
compiler.set_params(params);
if let Some(externals) = externals {
add_externals(&mut compiler, externals)?;
}
if let Some(cb) = include_callback {
if !cb.is_callable() {
return Err(PyTypeError::new_err("include_callback is not callable"));
}
let include_callback = cb.clone().unbind();
compiler.set_include_callback(move |include_name, current_path, ns| {
call_py_include_callback(&include_callback, include_name, current_path, ns)
.map_err(|desc| std::io::Error::new(std::io::ErrorKind::Other, desc))
});
}
let mut warnings = Vec::new();
match (filepath, source, file, filepaths, sources) {
(Some(filepath), None, None, None, None) => {
let res = compiler
.add_rules_file(filepath)
.map_err(|err| AddRuleError::new_err(format!("{err}")))?;
warnings = res.warnings().map(|err| format!("{err}")).collect();
}
(None, Some(source), None, None, None) => {
let res = compiler
.add_rules_str(source)
.map_err(|err| AddRuleError::new_err(format!("{err}")))?;
warnings = res.warnings().map(|err| format!("{err}")).collect();
}
(None, None, Some(file), None, None) => {
// Read the file into a string
let res = file.call_method0(intern!(file.py(), "read"))?;
let contents: &str = res.extract()?;
let res = compiler
.add_rules_str(contents)
.map_err(|err| AddRuleError::new_err(format!("{err}")))?;
warnings = res.warnings().map(|err| format!("{err}")).collect();
}
(None, None, None, Some(filepaths), None) => {
for (key, value) in filepaths {
let namespace: &str = key.extract().map_err(|_| {
PyTypeError::new_err("keys of the `filepaths` argument must be strings")
})?;
let filepath: &str = value.extract().map_err(|_| {
PyTypeError::new_err("values of the `filepaths` argument must be strings")
})?;
let res = compiler
.add_rules_file_in_namespace(filepath, namespace)
.map_err(|err| AddRuleError::new_err(format!("{err}")))?;
warnings.extend(res.warnings().map(|err| format!("{err}")));
}
}
(None, None, None, None, Some(sources)) => {
for (key, value) in sources {
let namespace: &str = key.extract().map_err(|_| {
PyTypeError::new_err("keys of the `sources` argument must be strings")
})?;
let source: &str = value.extract().map_err(|_| {
PyTypeError::new_err("values of the `sources` argument must be strings")
})?;
let res = compiler
.add_rules_str_in_namespace(source, namespace)
.map_err(|err| AddRuleError::new_err(format!("{err}")))?;
warnings.extend(res.warnings().map(|err| format!("{err}")));
}
}
_ => return Err(PyTypeError::new_err("invalid arguments passed")),
}
Ok(scanner::Scanner::new(compiler.finalize(), warnings))
}
/// Profile to use when compiling rules.
#[pyclass(frozen, eq, eq_int, module = "boreal")]
#[derive(Debug, PartialEq)]
enum CompilerProfile {
/// Prioritize scan speed.
///
/// This profile will strive to get the best possible scan speed by using more memory
/// when possible.
Speed = 0,
/// Prioritize memory usage
///
/// This profile will strive to reduce memory usage as much as possible, even if it means
/// a slower scan speed overall.
Memory = 1,
}
/// Modify some global parameters
///
/// Args:
/// max_strings_per_rule: Maximum number of strings allowed in a single rule.
/// If a rule has more strings than this limit, its compilation will fail.
///
/// It is not recommended to set this parameter using this API, as it impacts
/// the module globally. Instead, usage of the `max_strings_per_rule` parameter
/// of the `compile` method is preferred.
///
/// max_match_data: Maximum length for the match data returned in match
/// results. The match details returned in results will be truncated if
/// they exceed this limit. Default value is 512
///
/// It is not recommended to set this parameter using this API, as it impacts
/// the module globally. Instead, usage of the `max_match_data` parameter
/// of the `match` method on the scanner object is preferred.
///
/// stack_size: Unused, this is accepted purely for compatibility with yara.
///
/// yara_compatibility: Enable or disable full YARA compatibility. See the
/// global documentation of this library for more details.
///
/// Raises:
/// TypeError: A provided argument has the wrong type
#[pyfunction]
#[pyo3(signature = (
max_strings_per_rule=None,
max_match_data=None,
stack_size=None,
yara_compatibility=None,
))]
#[allow(clippy::too_many_arguments)]
fn set_config(
max_strings_per_rule: Option<usize>,
max_match_data: Option<usize>,
stack_size: Option<u64>,
yara_compatibility: Option<bool>,
) {
if let Some(value) = max_strings_per_rule {
*MAX_STRINGS_PER_RULE.lock() = Some(value);
}
if let Some(value) = max_match_data {
*MATCH_MAX_LENGTH.lock() = Some(value);
}
if let Some(value) = yara_compatibility {
YARA_PYTHON_COMPATIBILITY.store(value, Ordering::SeqCst);
}
// Ignore stack size, this isn't used in boreal.
let _ = stack_size;
}
/// Load rules from a serialized scanner object.
///
/// A scanner can be serialized into a bytestring and reloaded using
/// this function.
///
/// See [the boreal documentation](https://docs.rs/boreal/latest/boreal/scanner/struct.Scanner.html#method.to_bytes)
/// for more details about this feature and its limitations.
///
/// One of `filepath`, `file` or `data` must be provided.
///
/// Args:
/// filepath: The path to the file containing the serialized files.
/// file: An opened file containing the serialized files. This can be any
/// object that exposes a `read` method, as long as this read method
/// returns bytes.
/// data: The serialized bytes.
///
/// Returns:
/// a `Scanner` object.
///
/// Raises:
/// TypeError: A provided argument has the wrong type, or none
/// of the input arguments were provided.
/// boreal.Error: The deserialization failed.
#[cfg(feature = "serialize")]
#[pyfunction]
#[pyo3(signature = (
filepath=None,
file=None,
data=None,
))]
fn load(
filepath: Option<&str>,
file: Option<&Bound<'_, PyAny>>,
data: Option<&[u8]>,
) -> PyResult<scanner::Scanner> {
let res = match (filepath, file, data) {
(Some(filepath), None, None) => {
let contents = std::fs::read(filepath)?;
scanner::Scanner::load(&contents)
}
(None, Some(file), None) => {
let Ok(res) = file.call_method0(intern!(file.py(), "read")) else {
return Err(PyTypeError::new_err(
"the file parameter must implement the read method",
));
};
let contents: &[u8] = res.extract()?;
scanner::Scanner::load(contents)
}
(None, None, Some(data)) => scanner::Scanner::load(data),
_ => {
return Err(PyTypeError::new_err(
"one of filepath or file must be passed",
))
}
};
res.map_err(|err| Error::new_err(format!("Unable to create a Scanner from bytes: {err:?}")))
}
fn call_py_include_callback(
include_callback: &Py<PyAny>,
include_name: &str,
current_path: Option<&Path>,
ns: &str,
) -> Result<String, String> {
let current_path = current_path.map(|v| v.display().to_string());
Python::attach(|py| {
let res = include_callback
.call1(py, (include_name, current_path, ns))
.map_err(|err| format!("error when calling include callback: {err:?}"))?;
res.extract(py)
.map_err(|err| format!("include callback did not return a string: {err:?}"))
})
}
fn get_available_modules(py: Python<'_>) -> Vec<Bound<'_, PyString>> {
build_compiler(None)
.available_modules()
.map(|s| PyString::new(py, s))
.collect()
}
fn build_compiler(profile: Option<&CompilerProfile>) -> compiler::Compiler {
compiler::CompilerBuilder::new()
.profile(match profile {
Some(CompilerProfile::Speed) | None => ::boreal::compiler::CompilerProfile::Speed,
Some(CompilerProfile::Memory) => ::boreal::compiler::CompilerProfile::Memory,
})
.add_module(::boreal::module::Console::with_callback(|log| {
// XXX: when targetting python 3.12 or above, this could be simplified
// by using the "%.*s" format, avoiding the CString conversion.
if let Ok(cstr) = CString::new(log) {
// Safety: see <https://docs.python.org/3/c-api/unicode.html#c.PyUnicode_FromFormat>
// for the format. A '%s" expects a c-string pointer, which has just been built.
unsafe { ffi::PySys_FormatStdout(c"%s\n".as_ptr(), cstr.as_ptr()) }
}
}))
.build()
}
fn add_externals(compiler: &mut compiler::Compiler, externals: &Bound<'_, PyDict>) -> PyResult<()> {
for (key, value) in externals {
let name: &str = key.extract()?;
if let Ok(v) = value.extract::<bool>() {
let _r = compiler.define_symbol(name, v);
} else if let Ok(v) = value.extract::<i64>() {
let _r = compiler.define_symbol(name, v);
} else if let Ok(v) = value.extract::<f64>() {
let _r = compiler.define_symbol(name, v);
} else if let Ok(v) = value.extract::<&str>() {
let _r = compiler.define_symbol(name, v);
} else if let Ok(v) = value.extract::<&[u8]>() {
let _r = compiler.define_symbol(name, v);
} else {
return Err(PyTypeError::new_err(
"invalid type for the external value, must be a boolean, integer, float or string",
));
}
}
Ok(())
}