Skip to content

Commit 5b1acc0

Browse files
authored
partially upgrades byteweight to work with the modern bap (#1431)
This change re-enables Byteweight, which was effectively disabled since after we have started to install signatures in a different place. Next, it revamps the byteweight plugin and bap-byteweight-signatures library to work with modern bap infrastructure that uses Theory.Target instead of the old Bap.Std.Arch. The plugin itself was also rewritten and uses only modern interfaces. It stores roots in the knowledge base (no streams anymore) and uses targets and compiler properties from the knowledge base.
1 parent 5ad24c7 commit 5b1acc0

File tree

9 files changed

+367
-142
lines changed

9 files changed

+367
-142
lines changed

lib/bap_byteweight/bap_byteweight.ml

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ module Make2
101101
end
102102

103103
module Bytes = struct
104-
include Make2(struct
104+
module Self = Make2(struct
105105
type t = mem
106106
type key = mem
107107

@@ -114,6 +114,16 @@ module Bytes = struct
114114
| _ -> None
115115
end)(Memory.Trie.Stable.V1.R8)
116116

117+
let t = Bap_byteweight_signatures.Data.declare "bytes"
118+
~load:(fun bytes ->
119+
Binable.of_string (module Self)
120+
(Caml.Bytes.unsafe_to_string bytes))
121+
~save:(fun data ->
122+
Caml.Bytes.unsafe_of_string @@
123+
Binable.to_string (module Self) data)
124+
125+
include Self
126+
117127

118128
let find bw ~length ~threshold mem =
119129
let start = Memory.min_addr mem in

lib/bap_byteweight/bap_byteweight.mli

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,8 @@ module Bytes : sig
155155
and type corpus = mem
156156
and type token := word
157157

158+
val t : t Bap_byteweight_signatures.data
159+
158160

159161
(** [find mem ~length ~threshold corpus] extract addresses of all
160162
memory chunks of the specified [length], that were classified

lib/bap_byteweight/bap_byteweight_signatures.ml

Lines changed: 136 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
open Core_kernel
2-
open Regular.Std
2+
open Bap_core_theory
33
open Bap.Std
4-
include Self()
54

65
module Config = Bap_main.Extension.Configuration
76

@@ -14,14 +13,62 @@ type error = [
1413
| `Sys_error of string
1514
]
1615

16+
17+
type 'a data = {
18+
name : string;
19+
load : (bytes -> 'a);
20+
save : ('a -> bytes);
21+
}
22+
1723
exception Failed of error
1824

1925
let fail error = raise (Failed error)
20-
let zip_error entry err =
21-
fail (`Corrupted (sprintf "%s: %s" entry err))
22-
23-
let entry ?(comp="default") ~mode arch =
24-
Arch.to_string arch / comp / mode
26+
let corrupted entry err = `Corrupted (sprintf "%s: %s" entry err)
27+
let zip_error entry err = fail (corrupted entry err)
28+
29+
let compiler_name =
30+
Option.value_map ~default:"default" ~f: Theory.Compiler.name
31+
32+
let matches_modulo_bits t name =
33+
match Theory.Target.matching t name with
34+
| None -> false
35+
| Some t' -> Theory.Target.bits t = Theory.Target.bits t'
36+
37+
let matching_entry ?compiler target data {Zip.filename} =
38+
match String.split filename ~on:'/' with
39+
| [p1; p2; p3] ->
40+
matches_modulo_bits target p1 &&
41+
String.equal (compiler_name compiler) p2 &&
42+
String.equal data.name p3
43+
| _ -> fail (`Corrupted ("invalid entry name: " ^ filename))
44+
45+
let with_input file k =
46+
let zip = Zip.open_in file in
47+
protect ~finally:(fun () -> Zip.close_in zip) ~f:(fun () -> k zip)
48+
49+
let with_output file k =
50+
let zip = Zip.open_out file in
51+
protect ~finally:(fun () -> Zip.close_out zip) ~f:(fun () -> k zip)
52+
53+
let read_entry ?compiler target data file =
54+
with_input file @@ fun zip ->
55+
Zip.entries zip |>
56+
List.find ~f:(matching_entry ?compiler target data) |> function
57+
| None -> None
58+
| Some entry ->
59+
Some (data.load (Bytes.of_string (Zip.read_entry zip entry)))
60+
61+
let read_entries file =
62+
if Fn.non Sys.file_exists file then []
63+
else with_input file @@ fun zip ->
64+
Zip.entries zip |>
65+
List.map ~f:(fun entry ->
66+
entry,Zip.read_entry zip entry)
67+
68+
let target_name = Fn.compose KB.Name.unqualified Theory.Target.name
69+
70+
let make_entry ?compiler target data =
71+
target_name target / compiler_name compiler / data.name
2572

2673
let make_path root = root / "signatures" / "byteweight.zip"
2774

@@ -31,14 +78,88 @@ let default_path = match Sys.getenv_opt "BAP_SIGFILE" with
3178
| Some path -> path
3279
| None -> make_path Config.datadir
3380

34-
let paths = [default_path; system_path]
35-
36-
let resolve_path user = match user with
81+
let default_paths = [default_path; system_path]
82+
83+
let try_lookup ?(paths=[]) ?compiler target data =
84+
paths @ default_paths |> List.find_map ~f:(fun path ->
85+
if Sys.file_exists path
86+
then read_entry ?compiler target data path
87+
else None)
88+
89+
let of_exn = function
90+
| Sys_error msg -> Error (`Sys_error msg)
91+
| Zip.Error (_,ent,err) -> Error (corrupted ent err)
92+
| Failed er -> Error er
93+
| other -> raise other
94+
95+
let lookup ?paths ?compiler target data =
96+
match try_lookup ?paths ?compiler target data with
97+
| exception exn -> of_exn exn
98+
| None -> Error (`No_entry (target_name target))
99+
| Some data -> Ok data
100+
101+
102+
let update_or_fail ?compiler target data payload path =
103+
let entries =
104+
read_entries path |>
105+
List.filter ~f:(fun (entry,_) ->
106+
not (matching_entry ?compiler target data entry)) in
107+
with_output path @@ fun zip ->
108+
let path = make_entry ?compiler target data in
109+
let data = Bytes.unsafe_to_string (data.save payload) in
110+
Zip.add_entry data zip path;
111+
List.iter entries ~f:(fun ({Zip.filename; extra; comment; mtime},data) ->
112+
Zip.add_entry data zip filename
113+
~extra ~comment ~mtime)
114+
115+
let copy input output =
116+
let len = 0x1000 in
117+
let buf = Bytes.create len in
118+
let rec loop () =
119+
let read = In_channel.input input ~buf ~pos:0 ~len in
120+
Out_channel.output output ~buf ~pos:0 ~len:read;
121+
if read = len then loop () in
122+
loop ()
123+
124+
let temporary_copy file =
125+
let tmp,output = Caml.Filename.open_temp_file "byteweight" "copy" in
126+
In_channel.with_file file ~f:(fun input -> copy input output);
127+
Out_channel.close output;
128+
tmp
129+
130+
let update ?compiler target data payload path =
131+
let tmp = temporary_copy path in
132+
try
133+
update_or_fail ?compiler target data payload path;
134+
Sys.rename tmp path;
135+
Ok ()
136+
with exn ->
137+
Sys.remove tmp;
138+
of_exn exn
139+
140+
module Data = struct
141+
let registry = Hash_set.create (module String)
142+
143+
let declare ~load ~save name =
144+
if Hash_set.mem registry name
145+
then failwithf "The byteweight data type named %S is \
146+
already registered, please pick another name"
147+
name ();
148+
Hash_set.add registry name;
149+
{load; save; name}
150+
end
151+
152+
(* the old deprecated implementation *)
153+
154+
let resolve_path user =
155+
let user = Option.value_map user ~f:List.return ~default:[] in
156+
let paths = user @ default_paths in
157+
match List.find paths ~f:Sys.file_exists with
158+
| None -> fail `No_signatures
37159
| Some path -> path
38-
| None ->
39-
match List.find paths ~f:Sys.file_exists with
40-
| Some path -> path
41-
| None -> fail `No_signatures
160+
161+
let entry ?(comp="default") ~mode arch =
162+
Arch.to_string arch / comp / mode
42163

43164
let load_exn ?comp ?path ~mode arch =
44165
let path = resolve_path path in
@@ -48,7 +169,7 @@ let load_exn ?comp ?path ~mode arch =
48169
let entry_path = entry ?comp ~mode arch in
49170
let r = try
50171
let entry = Zip.find_entry zip entry_path in
51-
Ok (Zip.read_entry zip entry |> Bytes.of_string)
172+
Ok (Zip.read_entry zip entry |> Caml.Bytes.unsafe_of_string)
52173
with Caml.Not_found -> fail (`No_entry entry_path)
53174
| Zip.Error (_,ent,err) -> zip_error ent err in
54175
Zip.close_in zip;

lib/bap_byteweight/bap_byteweight_signatures.mli

Lines changed: 78 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,17 @@
1-
(** Provides signatures storage *)
1+
(** Interface to the unified storage of signatures.
2+
3+
The signatures a key-value pairs (entries) located in one or more
4+
archives. Keys are target/compiler descriptions and values are
5+
arbitrary data.
6+
7+
The data types of the signature are described with the [Data]
8+
module. This library doesn't specify any data types of signature
9+
values and they are commonly provided by the libraries that define
10+
those data types, e.g., [Bap_byteweight.Bytes].
11+
*)
12+
213
open Core_kernel
3-
open Regular.Std
14+
open Bap_core_theory
415
open Bap.Std
516

617
(** Error conditions *)
@@ -11,16 +22,77 @@ type error = [
1122
| `Sys_error of string (** System error has occurred *)
1223
]
1324

25+
(** the descriptor of the data type stored in the signature entry.
26+
27+
@since 2.5.0
28+
*)
29+
type 'a data
30+
31+
32+
(** [lookup t f] looks up for the matching entry in the signature database.
33+
34+
The search is performed over the [paths] list that is a list of
35+
filenames. The first matching entry is selected. If a file in the
36+
[paths] list doesn't exist then it is skipped. If it exists but
37+
unreadable an error is returned.
38+
39+
The paths list is always appended by [[default_path; system_path]],
40+
in that specific order.
41+
42+
If [compiler] is specified, then only entries that list matching
43+
compiler will be selected.
44+
45+
The target matches are performed with the [Theory.Target.matches]
46+
function.
47+
48+
@since 2.5.0
49+
*)
50+
val lookup :
51+
?paths:string list ->
52+
?compiler:Theory.compiler ->
53+
Theory.Target.t -> 'a data -> ('a, error) Result.t
54+
55+
56+
(** [update t f x path] updates or creates an entry in the signature database.
57+
58+
Removes all entries that match with the specified compiler,
59+
target, and data type and adds a new entry with the provided
60+
data. All unmatching entries are preserved.
61+
62+
@since 2.5.0
63+
*)
64+
val update :
65+
?compiler:Theory.compiler ->
66+
Theory.Target.t -> 'a data -> 'a -> string -> (unit,error) Result.t
67+
68+
69+
(** Interface for declaring signature database data types. *)
70+
module Data : sig
71+
72+
(** [declare ~load ~save name] declares a new mode.
73+
74+
The [load] and [save] functions are used to store the mode
75+
information in the signatures database.
76+
77+
Raises an exception if the mode name is not unique.
78+
*)
79+
val declare :
80+
load:(bytes -> 'a) ->
81+
save:('a -> bytes) ->
82+
string -> 'a data
83+
end
1484

1585
(** [save ?comp ~mode ~path arch data] store signatures data in the
16-
database of signatures specified by the [path] parameter. The
17-
triple [arch-comp-mode] defines a key for the created entry. If an
86+
database of signatures specified by the [path] parameter.
87+
88+
89+
The triple [arch-comp-mode] defines a key for the created entry. If an
1890
entry with the same name existed, then it would be overwritten
1991
with the new data. If the database, doesn't exist, then it will be
2092
created and the specified destination.*)
2193
val save : ?comp:string -> mode:string -> path:string -> arch -> bytes ->
2294
(unit,error) Result.t
23-
95+
[@@deprecated "since 2022-02 use [lookup]"]
2496

2597
(** [load ?comp ?path ~mode arch] finds a signature for the specified
2698
[arch-comp-path] triple.
@@ -33,6 +105,7 @@ val save : ?comp:string -> mode:string -> path:string -> arch -> bytes ->
33105
*)
34106
val load : ?comp:string -> ?path:string -> mode:string -> arch ->
35107
(bytes,error) Result.t
108+
[@@deprecated "since 2022-02 use [update]"]
36109

37110

38111
(** default path for the user's signatures database.

oasis/byteweight

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,17 @@ Library bap_byteweight
88
Build$: flag(everything) || flag(byteweight)
99
CompiledObject: best
1010
Modules: Bap_byteweight, Bap_byteweight_signatures
11-
BuildDepends: bap, bap-main, core_kernel, uri, regular, camlzip, ppx_bap
11+
BuildDepends: bap, bap-main, bap-core-theory, bap-knowledge,
12+
core_kernel, uri, camlzip, ppx_bap
1213

1314
Library byteweight_plugin
1415
Path: plugins/byteweight
1516
FindlibName: bap-plugin-byteweight
1617
Build$: flag(everything) || flag(byteweight)
1718
CompiledObject: best
18-
BuildDepends: bap, bap-byteweight, core_kernel, regular, ppx_bap, bap-future
19+
BuildDepends: bap, bap-byteweight, core_kernel, ppx_bap,
20+
bitvec, bitvec-order,
21+
bap-knowledge, bap-core-theory, bap-main
1922
InternalModules: Byteweight_main
2023
XMETADescription: find function starts using Byteweight algorithm
2124
XMETAExtraLines: tags="pass, rooter"

plugins/byteweight/.merlin

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
PKG cmdliner
21
REC
3-
B ../../_build/lib/bap_byteweight
2+
B ../../_build/lib/bap_byteweight
3+
B ../../lib/bap_byteweight

0 commit comments

Comments
 (0)