Skip to content

Commit a4054d2

Browse files
authored
[optimization] do not store empty objects in the knowledge base (#1411)
Improves the performance by not storing the empty objects (objects without values) in the knowledge base. When an object is created, we just increase the id of the last created object instead of storing it in the heap of objects. This improves performance and memory footprint and creating a new object doesn't require any more memory in the knowledge base. As a side effect, the object identifiers are never reused, therefore there's no possiblility for unwanted aliasing. It will also make much easier to implement KB garbage collector or tree shaker if we will even need them. The optmization gets about 10% improvement in both memory and time consumption, and overall performance improvement since 2.4.0 is about 50%. The knowledge base canonical representation is changed but in a backward compatible manner, so that the old knowledge bases should be read correctly and will be updated, if necessary in the new format. This performance optimization is a tradeof between expending the object space and overall performance of BAP. At the cost of using more object identifiers (and we have plenty of them in the 2^60 space so that we will run out of the space, both RAM or HDD long before we will run out of the identifiers). The story might look different in the OCaml 32-bit word, though.
1 parent c64a77f commit a4054d2

File tree

2 files changed

+55
-101
lines changed

2 files changed

+55
-101
lines changed

lib/knowledge/bap_knowledge.ml

Lines changed: 54 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -2039,23 +2039,21 @@ module Knowledge = struct
20392039
type work = Done | Work of workers
20402040

20412041
type objects = {
2042+
last : Oid.t;
20422043
vals : Record.t Oid.Map.t;
20432044
comp : work Map.M(Name).t Oid.Map.t;
20442045
syms : fullname Oid.Map.t;
2045-
heap : cell Oid.Map.t;
2046-
data : Oid.t Cell.Map.t;
20472046
objs : Oid.t String.Map.t String.Map.t;
20482047
pubs : Oid.Set.t String.Map.t;
20492048
}
20502049

20512050
let empty_class = {
2051+
last = Oid.first_atom;
20522052
vals = Map.empty (module Oid);
20532053
comp = Map.empty (module Oid);
20542054
objs = Map.empty (module String);
20552055
syms = Map.empty (module Oid);
20562056
pubs = Map.empty (module String);
2057-
heap = Map.empty (module Oid);
2058-
data = Map.empty (module Cell);
20592057
}
20602058

20612059
type t = {
@@ -2303,17 +2301,9 @@ module Knowledge = struct
23032301
type +'a t = 'a obj
23042302
type 'a ord = Oid.comparator_witness
23052303

2306-
let with_new_object objs f = match Map.max_elt objs.Env.vals with
2307-
| None -> f Oid.first_atom {
2308-
objs
2309-
with vals = Map.singleton (module Oid) Oid.first_atom Record.empty
2310-
}
2311-
| Some (key,_) ->
2312-
let key = Oid.next key in
2313-
f key {
2314-
objs
2315-
with vals = Map.add_exn objs.vals ~key ~data:Record.empty
2316-
}
2304+
let with_new_object objs f =
2305+
let next = Oid.next objs.Env.last in
2306+
f next {objs with Env.last = next}
23172307

23182308
let create : ('a,_) cls -> 'a obj Knowledge.t = fun cls ->
23192309
objects cls >>= fun objs ->
@@ -2854,72 +2844,6 @@ module Knowledge = struct
28542844
let set_package name = update @@ fun s -> {s with package = name}
28552845
end
28562846

2857-
2858-
module Data : sig
2859-
type +'a t
2860-
type 'a ord
2861-
2862-
val atom : ('a,_) cls -> 'a obj -> 'a t knowledge
2863-
val cons : ('a,_) cls -> 'a t -> 'a t -> 'a t knowledge
2864-
2865-
val case : ('a,_) cls -> 'a t ->
2866-
null:'r knowledge ->
2867-
atom:('a obj -> 'r knowledge) ->
2868-
cons:('a t -> 'a t -> 'r knowledge) -> 'r knowledge
2869-
2870-
2871-
val id : 'a obj -> Int63.t
2872-
2873-
2874-
module type S = sig
2875-
type t [@@deriving sexp]
2876-
include Base.Comparable.S with type t := t
2877-
include Binable.S with type t := t
2878-
end
2879-
2880-
val derive : ('a,_) cls -> (module S
2881-
with type t = 'a t
2882-
and type comparator_witness = 'a ord)
2883-
end = struct
2884-
type +'a t = 'a obj
2885-
type 'a ord = Oid.comparator_witness
2886-
2887-
let atom _ x = Knowledge.return x
2888-
2889-
let add_cell {Class.name} objects oid cell =
2890-
let {Env.data; heap} = objects in
2891-
let data = Map.add_exn data ~key:cell ~data:oid in
2892-
let heap = Map.add_exn heap ~key:oid ~data:cell in
2893-
update (fun s -> {
2894-
s with classes = Map.set s.classes name {
2895-
objects with data; heap
2896-
}}) >>| fun () ->
2897-
oid
2898-
2899-
let cons cls car cdr =
2900-
let cell = {car; cdr} in
2901-
objects cls >>= function {data; heap} as s ->
2902-
match Map.find data cell with
2903-
| Some id -> Knowledge.return id
2904-
| None -> match Map.max_elt heap with
2905-
| None ->
2906-
add_cell cls s Oid.first_cell cell
2907-
| Some (id,_) ->
2908-
add_cell cls s (Oid.next id) cell
2909-
2910-
let case cls x ~null ~atom ~cons =
2911-
if Oid.is_null x then null else
2912-
if Oid.is_atom x || Oid.is_number x then atom x
2913-
else objects cls >>= fun {Env.heap} ->
2914-
let cell = Map.find_exn heap x in
2915-
cons cell.car cell.cdr
2916-
2917-
let id = Object.id
2918-
2919-
module type S = Object.S
2920-
let derive = Object.derive
2921-
end
2922-
29232847
module Syntax = struct
29242848
include Knowledge.Syntax
29252849
include Knowledge.Let
@@ -3040,7 +2964,7 @@ module Knowledge = struct
30402964
Format.fprintf ppf "@]";
30412965

30422966
module Io = struct
3043-
type version = V1 [@@deriving bin_io]
2967+
type version = V1 | V2 [@@deriving bin_io]
30442968

30452969
module List = Base.List
30462970

@@ -3051,12 +2975,14 @@ module Knowledge = struct
30512975
comp : Name.t list;
30522976
} [@@deriving bin_io]
30532977

3054-
type objects = data list [@@deriving bin_io]
3055-
type payload = (Name.t * objects) list [@@deriving bin_io]
2978+
type v1 = data list [@@deriving bin_io]
2979+
type v2 = Oid.t * v1 [@@deriving bin_io]
2980+
type 'a objects = 'a [@@deriving bin_io]
2981+
type 'a payload = (Name.t * 'a) list [@@deriving bin_io]
30562982

3057-
type canonical = {
2983+
type 'a canonical = {
30582984
version : version;
3059-
payload : payload;
2985+
payload : 'a payload;
30602986
} [@@deriving bin_io]
30612987

30622988
let magic = "CMU:KB"
@@ -3129,37 +3055,66 @@ module Knowledge = struct
31293055
| None -> []
31303056
| Some works -> Map.keys works
31313057

3132-
3133-
let to_canonical {Env.classes} =
3058+
let to_canonical {Env.classes} : v2 canonical =
31343059
let payload =
31353060
Map.to_alist classes |>
3136-
List.map ~f:(fun (cid, {Env.vals; syms; comp}) ->
3137-
cid,
3138-
Map.to_alist vals |> List.filter_map ~f:(fun (oid,value) ->
3061+
List.map ~f:(fun (cid, {Env.vals; syms; comp; last}) ->
3062+
let data = Map.to_alist vals |> List.filter_map ~f:(fun (oid,value) ->
31393063
let data = serialize_record value in
31403064
let sym = Map.find syms oid in
31413065
let comp = collect_comps comp oid in
31423066
if Array.is_empty data && Option.is_none sym
31433067
then None
3144-
else Some {key=oid; sym; data; comp})) in {
3068+
else Some {key=oid; sym; data; comp}) in
3069+
cid,(last,data)) in {
31453070
version = V1;
31463071
payload;
31473072
}
31483073

3149-
let of_canonical {payload} =
3074+
let init_last : state -> state = fun state -> {
3075+
state with
3076+
classes = Map.map state.classes ~f:(fun cls -> {
3077+
cls with
3078+
last = match Map.max_elt cls.vals with
3079+
| None -> cls.last
3080+
| Some (k,_) -> Oid.next k
3081+
})
3082+
}
3083+
3084+
let of_canonical_v1 {payload} =
31503085
let init = Map.empty (module Name) in
31513086
let classes =
31523087
List.fold payload ~init ~f:(fun state (cid,objs) ->
31533088
Map.add_exn state ~key:cid
31543089
~data:(List.fold objs ~f:add_object
31553090
~init:Env.empty_class)) in
3091+
init_last {empty with classes}
3092+
3093+
let of_canonical_v2 {payload} =
3094+
let init = Map.empty (module Name) in
3095+
let classes =
3096+
List.fold payload ~init ~f:(fun state (cid,(last,objs)) ->
3097+
let init = {
3098+
Env.empty_class with last
3099+
} in
3100+
Map.add_exn state ~key:cid
3101+
~data:(List.fold objs ~f:add_object
3102+
~init)) in
31563103
{empty with classes}
31573104

3105+
31583106
let of_bigstring data =
31593107
let pos_ref = ref (check_magic data) in
3160-
let V1 = bin_read_version data ~pos_ref in
3161-
let payload = bin_read_payload data ~pos_ref in
3162-
of_canonical {version=V1; payload}
3108+
let version = bin_read_version data ~pos_ref in
3109+
match version with
3110+
| V1 -> of_canonical_v1 {
3111+
version;
3112+
payload = bin_read_payload bin_read_v1 data ~pos_ref
3113+
}
3114+
| V2 -> of_canonical_v2 {
3115+
version;
3116+
payload = bin_read_payload bin_read_v2 data ~pos_ref
3117+
}
31633118

31643119
let load path =
31653120
let fd = Unix.openfile path Unix.[O_RDONLY] 0o400 in
@@ -3177,21 +3132,21 @@ module Knowledge = struct
31773132
let blit_canonical_to_bigstring repr buf =
31783133
Bigstring.From_string.blito ~src:magic ~dst:buf ();
31793134
let pos = String.length magic in
3180-
let _p = bin_write_canonical ~pos buf repr in
3135+
let _p = bin_write_canonical bin_write_v2 ~pos buf repr in
31813136
()
31823137

31833138
let to_bigstring state =
31843139
let repr = to_canonical state in
31853140
let size = String.length magic +
3186-
bin_size_canonical repr in
3141+
bin_size_canonical bin_size_v2 repr in
31873142
let data = Bigstring.create size in
31883143
blit_canonical_to_bigstring repr data;
31893144
data
31903145

31913146
let save state path =
31923147
let repr = to_canonical state in
31933148
let size = String.length magic +
3194-
bin_size_canonical repr in
3149+
bin_size_canonical bin_size_v2 repr in
31953150
let fd = Unix.openfile path Unix.[O_RDWR; O_CREAT; O_TRUNC] 0o660 in
31963151
try
31973152
let dim = [|size |]in

plugins/bil/bil_lifter.ml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,7 @@ module Brancher = struct
163163
let goto dst = ret Theory.Effect.Sort.jump dst
164164

165165
let jmp _ =
166-
KB.Object.create Theory.Program.cls >>= fun dst ->
167-
ret Theory.Effect.Sort.jump dst
166+
ret Theory.Effect.Sort.jump Theory.Label.null
168167

169168
let seq x y =
170169
x >>= fun x ->

0 commit comments

Comments
 (0)