Skip to content

Commit 8083ec5

Browse files
committed
feat: implement all options of compile method
1 parent add9589 commit 8083ec5

File tree

5 files changed

+165
-58
lines changed

5 files changed

+165
-58
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

boreal-py/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ crate-type = ["cdylib"]
1717
[dependencies]
1818
boreal = { path = "../boreal", version = "0.9.0" }
1919

20+
libc = "0.2"
2021
pyo3 = { version = "0.23", features = ["extension-module", "macros"] }
2122

2223
[lints]

boreal-py/src/lib.rs

Lines changed: 161 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,17 @@
11
//! Python bindings for the boreal library.
2+
#![allow(unsafe_code)]
3+
4+
use std::fs::File;
5+
use std::io::Read;
6+
use std::os::fd::{FromRawFd, OwnedFd};
7+
8+
use pyo3::create_exception;
9+
use pyo3::exceptions::{PyException, PyTypeError};
10+
use pyo3::ffi;
211
use pyo3::prelude::*;
12+
use pyo3::types::PyDict;
313

4-
use ::boreal::Compiler;
14+
use ::boreal::compiler;
515

616
// TODO: all clone impls should be efficient...
717
// TODO: should all pyclasses have names and be exposed in the module?
@@ -11,23 +21,164 @@ mod scanner;
1121
mod string_match_instance;
1222
mod string_matches;
1323

24+
create_exception!(boreal, AddRuleError, PyException, "error when adding rules");
25+
1426
#[pymodule]
1527
fn boreal(m: &Bound<'_, PyModule>) -> PyResult<()> {
16-
m.add_function(wrap_pyfunction!(compile, m)?)
28+
m.add_function(wrap_pyfunction!(compile, m)?)?;
29+
m.add("AddRuleError", m.py().get_type::<AddRuleError>())?;
30+
Ok(())
1731
}
1832

33+
// TODO: add strict_escape?
1934
#[pyfunction]
20-
#[pyo3(signature = (filepath=None, source=None))]
21-
fn compile(filepath: Option<&str>, source: Option<&str>) -> PyResult<scanner::PyScanner> {
22-
let mut compiler = Compiler::new();
23-
match (filepath, source) {
24-
(Some(v), None) => compiler.add_rules_file(v),
25-
(None, Some(v)) => compiler.add_rules_str(v),
26-
_ => todo!(),
35+
#[pyo3(signature = (filepath=None, source=None, file=None, filepaths=None, sources=None, externals=None, includes=true, error_on_warning=false))]
36+
#[allow(clippy::too_many_arguments)]
37+
fn compile(
38+
filepath: Option<&str>,
39+
source: Option<&str>,
40+
file: Option<&Bound<'_, PyAny>>,
41+
filepaths: Option<&Bound<'_, PyDict>>,
42+
sources: Option<&Bound<'_, PyDict>>,
43+
externals: Option<&Bound<'_, PyDict>>,
44+
includes: bool,
45+
error_on_warning: bool,
46+
) -> PyResult<scanner::PyScanner> {
47+
let mut compiler = compiler::Compiler::new();
48+
compiler.set_params(
49+
compiler::CompilerParams::default()
50+
.disable_includes(!includes)
51+
.fail_on_warnings(error_on_warning),
52+
);
53+
if let Some(externals) = externals {
54+
add_externals(&mut compiler, externals)?;
55+
}
56+
57+
let mut warnings = Vec::new();
58+
59+
match (filepath, source, file, filepaths, sources) {
60+
(Some(filepath), None, None, None, None) => {
61+
let res = compiler
62+
.add_rules_file(filepath)
63+
// TODO: contents
64+
.map_err(|err| convert_compiler_error(&err, filepath, ""))
65+
.map_err(AddRuleError::new_err)?;
66+
warnings = res
67+
.warnings()
68+
.map(|err| convert_compiler_error(err, filepath, ""))
69+
.collect();
70+
}
71+
(None, Some(source), None, None, None) => {
72+
let res = compiler
73+
.add_rules_str(source)
74+
.map_err(|err| convert_compiler_error(&err, "source", source))
75+
.map_err(AddRuleError::new_err)?;
76+
warnings = res
77+
.warnings()
78+
.map(|err| convert_compiler_error(err, "source", source))
79+
.collect();
80+
}
81+
(None, None, Some(file), None, None) => {
82+
// Safety:
83+
// - `as_ptr` is safe because it does not outlive file
84+
// - `PyObject_AsFileDescriptor` is safe to call on any valid PyObject.
85+
// if it fails, -1 is returned.
86+
let fd = unsafe { ffi::PyObject_AsFileDescriptor(file.as_ptr()) };
87+
if fd == -1 {
88+
return Err(PyTypeError::new_err("`file` argument is not a file object"));
89+
}
90+
91+
// Safety: the passed file descriptor is valid and thus can be dupped.
92+
let owned_fd = unsafe { libc::dup(fd) };
93+
if owned_fd == -1 {
94+
return Err(std::io::Error::last_os_error().into());
95+
}
96+
97+
// Safety: the file descriptor is valid and is owned by us.
98+
let owned_fd = unsafe { OwnedFd::from_raw_fd(owned_fd) };
99+
100+
let mut file = File::from(owned_fd);
101+
let mut contents = String::new();
102+
let _r = file.read_to_string(&mut contents)?;
103+
// TODO: this makes the error message not as nice
104+
let res = compiler
105+
.add_rules_str(&contents)
106+
.map_err(|err| convert_compiler_error(&err, "file", &contents))
107+
.map_err(AddRuleError::new_err)?;
108+
warnings = res
109+
.warnings()
110+
.map(|err| convert_compiler_error(err, "file", &contents))
111+
.collect();
112+
}
113+
(None, None, None, Some(filepaths), None) => {
114+
for (key, value) in filepaths {
115+
let namespace: &str = key.extract().map_err(|_| {
116+
PyTypeError::new_err("keys of the `filepaths` argument must be strings")
117+
})?;
118+
let filepath: &str = value.extract().map_err(|_| {
119+
PyTypeError::new_err("values of the `filepaths` argument must be strings")
120+
})?;
121+
let res = compiler
122+
.add_rules_file_in_namespace(filepath, namespace)
123+
// TODO: contents
124+
.map_err(|err| convert_compiler_error(&err, filepath, ""))
125+
.map_err(AddRuleError::new_err)?;
126+
warnings.extend(
127+
res.warnings()
128+
.map(|err| convert_compiler_error(err, filepath, "")),
129+
);
130+
}
131+
}
132+
(None, None, None, None, Some(sources)) => {
133+
for (key, value) in sources {
134+
let namespace: &str = key.extract().map_err(|_| {
135+
PyTypeError::new_err("keys of the `sources` argument must be strings")
136+
})?;
137+
let source: &str = value.extract().map_err(|_| {
138+
PyTypeError::new_err("values of the `sources` argument must be strings")
139+
})?;
140+
let res = compiler
141+
.add_rules_str_in_namespace(source, namespace)
142+
.map_err(|err| convert_compiler_error(&err, namespace, source))
143+
.map_err(AddRuleError::new_err)?;
144+
warnings.extend(
145+
res.warnings()
146+
.map(|err| convert_compiler_error(err, namespace, source)),
147+
);
148+
}
149+
}
150+
_ => return Err(PyTypeError::new_err("invalid arguments passed")),
27151
}
28-
.unwrap();
29152

30153
Ok(scanner::PyScanner {
31154
scanner: compiler.into_scanner(),
155+
warnings,
32156
})
33157
}
158+
159+
fn convert_compiler_error(err: &compiler::AddRuleError, input_name: &str, input: &str) -> String {
160+
err.to_short_description(input_name, input)
161+
}
162+
163+
fn add_externals(compiler: &mut compiler::Compiler, externals: &Bound<'_, PyDict>) -> PyResult<()> {
164+
for (key, value) in externals {
165+
let name: &str = key.extract()?;
166+
167+
if let Ok(v) = value.extract::<bool>() {
168+
let _r = compiler.define_symbol(name, v);
169+
} else if let Ok(v) = value.extract::<i64>() {
170+
let _r = compiler.define_symbol(name, v);
171+
} else if let Ok(v) = value.extract::<f64>() {
172+
let _r = compiler.define_symbol(name, v);
173+
} else if let Ok(v) = value.extract::<&str>() {
174+
let _r = compiler.define_symbol(name, v);
175+
} else if let Ok(v) = value.extract::<&[u8]>() {
176+
let _r = compiler.define_symbol(name, v);
177+
} else {
178+
return Err(PyTypeError::new_err(
179+
"invalid type for the external value, must be a boolean, integer, float or string",
180+
));
181+
}
182+
}
183+
Ok(())
184+
}

boreal-py/src/scanner.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ use crate::rule_match::Match;
99
#[pyclass]
1010
pub struct PyScanner {
1111
pub scanner: Scanner,
12+
#[pyo3(get)]
13+
pub warnings: Vec<String>,
1214
}
1315

1416
#[pymethods]

boreal-py/tests/test_match.py

Lines changed: 0 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -9,54 +9,6 @@
99
]
1010

1111

12-
RULE = """
13-
rule foo: bar baz {
14-
meta:
15-
s = "a\\nz"
16-
b = true
17-
v = -11
18-
strings:
19-
$ = "fgj"
20-
$a = /l.{1,2}n/
21-
condition:
22-
any of them
23-
}
24-
"""
25-
26-
@pytest.mark.parametrize("module,is_yara", MODULES)
27-
def test_overall(module, is_yara):
28-
rule = module.compile(source=RULE)
29-
matches = rule.match(data='abcdefgjiklmnoprstuvwxyzlmmn')
30-
assert len(matches) == 1
31-
assert matches[0].rule == 'foo'
32-
# FIXME: difference between yara and boreal
33-
# assert matches[0].namespace == ''
34-
assert matches[0].tags == ['bar', 'baz']
35-
assert matches[0].meta == {
36-
# XXX yara forces a string type, losing information.
37-
's': 'a\nz' if is_yara else b'a\nz',
38-
'b': True,
39-
'v': -11
40-
}
41-
42-
m = matches[0]
43-
assert len(m.strings) == 2
44-
assert m.strings[0].identifier == '$'
45-
assert len(m.strings[0].instances) == 1
46-
assert m.strings[0].instances[0].offset == 5
47-
assert m.strings[0].instances[0].matched_length == 3
48-
assert m.strings[0].instances[0].matched_data == b'fgj'
49-
50-
assert m.strings[1].identifier == '$a'
51-
assert len(m.strings[1].instances) == 2
52-
assert m.strings[1].instances[0].offset == 10
53-
assert m.strings[1].instances[0].matched_length == 3
54-
assert m.strings[1].instances[0].matched_data == b'lmn'
55-
assert m.strings[1].instances[1].offset == 24
56-
assert m.strings[1].instances[1].matched_length == 4
57-
assert m.strings[1].instances[1].matched_data == b'lmmn'
58-
59-
6012
@pytest.mark.parametrize("module,is_yara", MODULES)
6113
def test_match(module, is_yara):
6214
"""Test all properties related to the Match object"""

0 commit comments

Comments
 (0)