Skip to content

Commit c90eb12

Browse files
authored
Merge pull request #1076 from nathanschmidt/null-byte-arrayDomain
Null Byte Array Domain
2 parents 18a9aac + 4577879 commit c90eb12

File tree

15 files changed

+2012
-118
lines changed

15 files changed

+2012
-118
lines changed

src/analyses/base.ml

Lines changed: 116 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1417,7 +1417,7 @@ struct
14171417
(** [set st addr val] returns a state where [addr] is set to [val]
14181418
* it is always ok to put None for lval_raw and rval_raw, this amounts to not using/maintaining
14191419
* precise information about arrays. *)
1420-
let set (a: Q.ask) ~(ctx: _ ctx) ?(invariant=false) ?lval_raw ?rval_raw ?t_override (gs:glob_fun) (st: store) (lval: AD.t) (lval_type: Cil.typ) (value: value) : store =
1420+
let set (a: Q.ask) ~(ctx: _ ctx) ?(invariant=false) ?(blob_destructive=false) ?lval_raw ?rval_raw ?t_override (gs:glob_fun) (st: store) (lval: AD.t) (lval_type: Cil.typ) (value: value) : store =
14211421
let update_variable x t y z =
14221422
if M.tracing then M.tracel "set" ~var:x.vname "update_variable: start '%s' '%a'\nto\n%a\n\n" x.vname VD.pretty y CPA.pretty z;
14231423
let r = update_variable x t y z in (* refers to defintion that is outside of set *)
@@ -1450,7 +1450,7 @@ struct
14501450
let update_offset old_value =
14511451
(* Projection globals to highest Precision *)
14521452
let projected_value = project_val (Queries.to_value_domain_ask a) None None value (is_global a x) in
1453-
let new_value = VD.update_offset (Queries.to_value_domain_ask a) old_value offs projected_value lval_raw ((Var x), cil_offset) t in
1453+
let new_value = VD.update_offset ~blob_destructive (Queries.to_value_domain_ask a) old_value offs projected_value lval_raw ((Var x), cil_offset) t in
14541454
if WeakUpdates.mem x st.weak then
14551455
VD.join old_value new_value
14561456
else if invariant then (
@@ -2189,24 +2189,90 @@ struct
21892189
(* do nothing if all characters are needed *)
21902190
| _ -> None
21912191
in
2192-
let string_manipulation s1 s2 lv all op =
2193-
let s1_a, s1_typ = addr_type_of_exp s1 in
2194-
let s2_a, s2_typ = addr_type_of_exp s2 in
2195-
match lv, op with
2196-
| Some lv_val, Some f ->
2197-
(* when whished types coincide, compute result of operation op, otherwise use top *)
2198-
let lv_a = eval_lv (Analyses.ask_of_ctx ctx) gs st lv_val in
2199-
let lv_typ = Cilfacade.typeOfLval lv_val in
2200-
if all && typeSig s1_typ = typeSig s2_typ && typeSig s2_typ = typeSig lv_typ then (* all types need to coincide *)
2201-
lv_a, lv_typ, (f s1_a s2_a)
2202-
else if not all && typeSig s1_typ = typeSig s2_typ then (* only the types of s1 and s2 need to coincide *)
2203-
lv_a, lv_typ, (f s1_a s2_a)
2204-
else
2205-
lv_a, lv_typ, (VD.top_value (unrollType lv_typ))
2206-
| _ ->
2207-
(* check if s1 is potentially a string literal as writing to it would be undefined behavior; then return top *)
2208-
let _ = AD.string_writing_defined s1_a in
2209-
s1_a, s1_typ, VD.top_value (unrollType s1_typ)
2192+
let address_from_value (v:value) = match v with
2193+
| Address a ->
2194+
(* TODO: is it fine to just drop the last index unconditionally? https://github.com/goblint/analyzer/pull/1076#discussion_r1408975611 *)
2195+
let rec lo = function
2196+
| `Index (i, `NoOffset) -> `NoOffset
2197+
| `NoOffset -> `NoOffset
2198+
| `Field (f, o) -> `Field (f, lo o)
2199+
| `Index (i, o) -> `Index (i, lo o) in
2200+
let rmLastOffset = function
2201+
| Addr.Addr (v, o) -> Addr.Addr (v, lo o)
2202+
| other -> other in
2203+
AD.map rmLastOffset a
2204+
| _ -> raise (Failure "String function: not an address")
2205+
in
2206+
let string_manipulation s1 s2 lv all op_addr op_array =
2207+
let s1_v = eval_rv (Analyses.ask_of_ctx ctx) gs st s1 in
2208+
let s1_a = address_from_value s1_v in
2209+
let s1_typ = AD.type_of s1_a in
2210+
let s2_v = eval_rv (Analyses.ask_of_ctx ctx) gs st s2 in
2211+
let s2_a = address_from_value s2_v in
2212+
let s2_typ = AD.type_of s2_a in
2213+
(* compute value in string literals domain if s1 and s2 are both string literals *)
2214+
(* TODO: is this reliable? there could be a char* which isn't StrPtr *)
2215+
if CilType.Typ.equal s1_typ charPtrType && CilType.Typ.equal s2_typ charPtrType then
2216+
begin match lv, op_addr with
2217+
| Some lv_val, Some f ->
2218+
(* when whished types coincide, compute result of operation op_addr, otherwise use top *)
2219+
let lv_a = eval_lv (Analyses.ask_of_ctx ctx) gs st lv_val in
2220+
let lv_typ = Cilfacade.typeOfLval lv_val in
2221+
if all && typeSig s1_typ = typeSig s2_typ && typeSig s2_typ = typeSig lv_typ then (* all types need to coincide *)
2222+
set ~ctx (Analyses.ask_of_ctx ctx) gs st lv_a lv_typ (f s1_a s2_a)
2223+
else if not all && typeSig s1_typ = typeSig s2_typ then (* only the types of s1 and s2 need to coincide *)
2224+
set ~ctx (Analyses.ask_of_ctx ctx) gs st lv_a lv_typ (f s1_a s2_a)
2225+
else
2226+
set ~ctx (Analyses.ask_of_ctx ctx) gs st lv_a lv_typ (VD.top_value (unrollType lv_typ))
2227+
| _ ->
2228+
(* check if s1 is potentially a string literal as writing to it would be undefined behavior; then return top *)
2229+
let _ = AD.string_writing_defined s1_a in
2230+
set ~ctx (Analyses.ask_of_ctx ctx) gs st s1_a s1_typ (VD.top_value (unrollType s1_typ))
2231+
end
2232+
(* else compute value in array domain *)
2233+
else
2234+
let lv_a, lv_typ = match lv with
2235+
| Some lv_val -> eval_lv (Analyses.ask_of_ctx ctx) gs st lv_val, Cilfacade.typeOfLval lv_val
2236+
| None -> s1_a, s1_typ in
2237+
begin match (get (Analyses.ask_of_ctx ctx) gs st s1_a None), get (Analyses.ask_of_ctx ctx) gs st s2_a None with
2238+
| Array array_s1, Array array_s2 -> set ~ctx ~blob_destructive:true (Analyses.ask_of_ctx ctx) gs st lv_a lv_typ (op_array array_s1 array_s2)
2239+
| Array array_s1, _ when CilType.Typ.equal s2_typ charPtrType ->
2240+
let s2_null_bytes = List.map CArrays.to_null_byte_domain (AD.to_string s2_a) in
2241+
let array_s2 = List.fold_left CArrays.join (CArrays.bot ()) s2_null_bytes in
2242+
set ~ctx ~blob_destructive:true (Analyses.ask_of_ctx ctx) gs st lv_a lv_typ (op_array array_s1 array_s2)
2243+
| Bot, Array array_s2 ->
2244+
(* If we have bot inside here, we assume the blob is used as a char array and create one inside *)
2245+
let ptrdiff_ik = Cilfacade.ptrdiff_ikind () in
2246+
let size = ctx.ask (Q.BlobSize {exp = s1; base_address = false}) in
2247+
let s_id =
2248+
try ValueDomainQueries.ID.unlift (ID.cast_to ptrdiff_ik) size
2249+
with Failure _ -> ID.top_of ptrdiff_ik in
2250+
let empty_array = CArrays.make s_id (Int (ID.top_of IChar)) in
2251+
set ~ctx (Analyses.ask_of_ctx ctx) gs st lv_a lv_typ (op_array empty_array array_s2)
2252+
| Bot , _ when CilType.Typ.equal s2_typ charPtrType ->
2253+
(* If we have bot inside here, we assume the blob is used as a char array and create one inside *)
2254+
let ptrdiff_ik = Cilfacade.ptrdiff_ikind () in
2255+
let size = ctx.ask (Q.BlobSize {exp = s1; base_address = false}) in
2256+
let s_id =
2257+
try ValueDomainQueries.ID.unlift (ID.cast_to ptrdiff_ik) size
2258+
with Failure _ -> ID.top_of ptrdiff_ik in
2259+
let empty_array = CArrays.make s_id (Int (ID.top_of IChar)) in
2260+
let s2_null_bytes = List.map CArrays.to_null_byte_domain (AD.to_string s2_a) in
2261+
let array_s2 = List.fold_left CArrays.join (CArrays.bot ()) s2_null_bytes in
2262+
set ~ctx (Analyses.ask_of_ctx ctx) gs st lv_a lv_typ (op_array empty_array array_s2)
2263+
| _, Array array_s2 when CilType.Typ.equal s1_typ charPtrType ->
2264+
(* if s1 is string literal, str(n)cpy and str(n)cat are undefined *)
2265+
if op_addr = None then
2266+
(* triggers warning, function only evaluated for side-effects *)
2267+
let _ = AD.string_writing_defined s1_a in
2268+
set ~ctx (Analyses.ask_of_ctx ctx) gs st s1_a s1_typ (VD.top_value (unrollType s1_typ))
2269+
else
2270+
let s1_null_bytes = List.map CArrays.to_null_byte_domain (AD.to_string s1_a) in
2271+
let array_s1 = List.fold_left CArrays.join (CArrays.bot ()) s1_null_bytes in
2272+
set ~ctx (Analyses.ask_of_ctx ctx) gs st lv_a lv_typ (op_array array_s1 array_s2)
2273+
| _ ->
2274+
set ~ctx (Analyses.ask_of_ctx ctx) gs st lv_a lv_typ (VD.top_value (unrollType lv_typ))
2275+
end
22102276
in
22112277
let st = match desc.special args, f.vname with
22122278
| Memset { dest; ch; count; }, _ ->
@@ -2229,53 +2295,51 @@ struct
22292295
set ~ctx (Analyses.ask_of_ctx ctx) gs st dest_a dest_typ value
22302296
| Memcpy { dest = dst; src; n; }, _ -> (* TODO: use n *)
22312297
memory_copying dst src (Some n)
2232-
(* strcpy(dest, src); *)
2233-
| Strcpy { dest = dst; src; n = None }, _ ->
2234-
let dest_a, dest_typ = addr_type_of_exp dst in
2235-
(* when dest surely isn't a string literal, try copying src to dest *)
2236-
if AD.string_writing_defined dest_a then
2237-
memory_copying dst src None
2238-
else
2239-
(* else return top (after a warning was issued) *)
2240-
set ~ctx (Analyses.ask_of_ctx ctx) gs st dest_a dest_typ (VD.top_value (unrollType dest_typ))
2241-
(* strncpy(dest, src, n); *)
2242-
| Strcpy { dest = dst; src; n }, _ ->
2243-
begin match eval_n n with
2244-
| Some num ->
2245-
let dest_a, dest_typ, value = string_manipulation dst src None false None in
2246-
set ~ctx (Analyses.ask_of_ctx ctx) gs st dest_a dest_typ value
2247-
| None -> failwith "already handled in case above"
2248-
end
2249-
| Strcat { dest = dst; src; n }, _ ->
2250-
let dest_a, dest_typ, value = string_manipulation dst src None false None in
2251-
set ~ctx (Analyses.ask_of_ctx ctx) gs st dest_a dest_typ value
2298+
| Strcpy { dest = dst; src; n }, _ -> string_manipulation dst src None false None (fun ar1 ar2 -> Array (CArrays.string_copy ar1 ar2 (eval_n n)))
2299+
| Strcat { dest = dst; src; n }, _ -> string_manipulation dst src None false None (fun ar1 ar2 -> Array (CArrays.string_concat ar1 ar2 (eval_n n)))
22522300
| Strlen s, _ ->
22532301
begin match lv with
22542302
| Some lv_val ->
22552303
let dest_a = eval_lv (Analyses.ask_of_ctx ctx) gs st lv_val in
22562304
let dest_typ = Cilfacade.typeOfLval lv_val in
2257-
let lval = mkMem ~addr:(Cil.stripCasts s) ~off:NoOffset in
2258-
let address = eval_lv (Analyses.ask_of_ctx ctx) gs st lval in
2259-
let (value:value) = Int(AD.to_string_length address) in
2305+
let v = eval_rv (Analyses.ask_of_ctx ctx) gs st s in
2306+
let a = address_from_value v in
2307+
let value:value =
2308+
(* if s string literal, compute strlen in string literals domain *)
2309+
(* TODO: is this reliable? there could be a char* which isn't StrPtr *)
2310+
if CilType.Typ.equal (AD.type_of a) charPtrType then
2311+
Int (AD.to_string_length a)
2312+
(* else compute strlen in array domain *)
2313+
else
2314+
begin match get (Analyses.ask_of_ctx ctx) gs st a None with
2315+
| Array array_s -> Int (CArrays.to_string_length array_s)
2316+
| _ -> VD.top_value (unrollType dest_typ)
2317+
end in
22602318
set ~ctx (Analyses.ask_of_ctx ctx) gs st dest_a dest_typ value
22612319
| None -> st
22622320
end
22632321
| Strstr { haystack; needle }, _ ->
22642322
begin match lv with
2265-
| Some _ ->
2266-
(* when haystack, needle and dest type coincide, check if needle is a substring of haystack:
2267-
if that is the case, assign the substring of haystack starting at the first occurrence of needle to dest,
2268-
else use top *)
2269-
let dest_a, dest_typ, value = string_manipulation haystack needle lv true (Some (fun h_a n_a -> Address(AD.substring_extraction h_a n_a))) in
2270-
set ~ctx (Analyses.ask_of_ctx ctx) gs st dest_a dest_typ value
2323+
| Some lv_val ->
2324+
(* check if needle is a substring of haystack in string literals domain if haystack and needle are string literals,
2325+
else check in null bytes domain if both haystack and needle are / can be transformed to an array domain representation;
2326+
if needle is substring, assign the substring of haystack starting at the first occurrence of needle to dest,
2327+
if it surely isn't, assign a null_ptr *)
2328+
string_manipulation haystack needle lv true (Some (fun h_a n_a -> Address (AD.substring_extraction h_a n_a)))
2329+
(fun h_ar n_ar -> match CArrays.substring_extraction h_ar n_ar with
2330+
| CArrays.IsNotSubstr -> Address (AD.null_ptr)
2331+
| CArrays.IsSubstrAtIndex0 -> Address (eval_lv (Analyses.ask_of_ctx ctx) gs st (mkMem ~addr:(Cil.stripCasts haystack) ~off:NoOffset))
2332+
| CArrays.IsMaybeSubstr -> Address (AD.join (eval_lv (Analyses.ask_of_ctx ctx) gs st
2333+
(mkMem ~addr:(Cil.stripCasts haystack) ~off:(Index (Offset.Index.Exp.any, NoOffset)))) (AD.null_ptr)))
22712334
| None -> st
22722335
end
22732336
| Strcmp { s1; s2; n }, _ ->
22742337
begin match lv with
22752338
| Some _ ->
2276-
(* when s1 and s2 type coincide, compare both both strings completely or their first n characters, otherwise use top *)
2277-
let dest_a, dest_typ, value = string_manipulation s1 s2 lv false (Some (fun s1_a s2_a -> Int(AD.string_comparison s1_a s2_a (eval_n n)))) in
2278-
set ~ctx (Analyses.ask_of_ctx ctx) gs st dest_a dest_typ value
2339+
(* when s1 and s2 are string literals, compare both completely or their first n characters in the string literals domain;
2340+
else compare them in the null bytes array domain if they are / can be transformed to an array domain representation *)
2341+
string_manipulation s1 s2 lv false (Some (fun s1_a s2_a -> Int (AD.string_comparison s1_a s2_a (eval_n n))))
2342+
(fun s1_ar s2_ar -> Int (CArrays.string_comparison s1_ar s2_ar (eval_n n)))
22792343
| None -> st
22802344
end
22812345
| Abort, _ -> raise Deadcode

0 commit comments

Comments
 (0)