@@ -1417,7 +1417,7 @@ struct
14171417 (* * [set st addr val] returns a state where [addr] is set to [val]
14181418 * it is always ok to put None for lval_raw and rval_raw, this amounts to not using/maintaining
14191419 * precise information about arrays. *)
1420- let set (a : Q.ask ) ~(ctx : _ ctx ) ?(invariant =false ) ?lval_raw ?rval_raw ?t_override (gs :glob_fun ) (st : store ) (lval : AD.t ) (lval_type : Cil.typ ) (value : value ) : store =
1420+ let set (a : Q.ask ) ~(ctx : _ ctx ) ?(invariant =false ) ?( blob_destructive = false ) ? lval_raw ?rval_raw ?t_override (gs :glob_fun ) (st : store ) (lval : AD.t ) (lval_type : Cil.typ ) (value : value ) : store =
14211421 let update_variable x t y z =
14221422 if M. tracing then M. tracel " set" ~var: x.vname " update_variable: start '%s' '%a'\n to\n %a\n\n " x.vname VD. pretty y CPA. pretty z;
14231423 let r = update_variable x t y z in (* refers to defintion that is outside of set *)
@@ -1450,7 +1450,7 @@ struct
14501450 let update_offset old_value =
14511451 (* Projection globals to highest Precision *)
14521452 let projected_value = project_val (Queries. to_value_domain_ask a) None None value (is_global a x) in
1453- let new_value = VD. update_offset (Queries. to_value_domain_ask a) old_value offs projected_value lval_raw ((Var x), cil_offset) t in
1453+ let new_value = VD. update_offset ~blob_destructive (Queries. to_value_domain_ask a) old_value offs projected_value lval_raw ((Var x), cil_offset) t in
14541454 if WeakUpdates. mem x st.weak then
14551455 VD. join old_value new_value
14561456 else if invariant then (
@@ -2189,24 +2189,90 @@ struct
21892189 (* do nothing if all characters are needed *)
21902190 | _ -> None
21912191 in
2192- let string_manipulation s1 s2 lv all op =
2193- let s1_a, s1_typ = addr_type_of_exp s1 in
2194- let s2_a, s2_typ = addr_type_of_exp s2 in
2195- match lv, op with
2196- | Some lv_val , Some f ->
2197- (* when whished types coincide, compute result of operation op, otherwise use top *)
2198- let lv_a = eval_lv (Analyses. ask_of_ctx ctx) gs st lv_val in
2199- let lv_typ = Cilfacade. typeOfLval lv_val in
2200- if all && typeSig s1_typ = typeSig s2_typ && typeSig s2_typ = typeSig lv_typ then (* all types need to coincide *)
2201- lv_a, lv_typ, (f s1_a s2_a)
2202- else if not all && typeSig s1_typ = typeSig s2_typ then (* only the types of s1 and s2 need to coincide *)
2203- lv_a, lv_typ, (f s1_a s2_a)
2204- else
2205- lv_a, lv_typ, (VD. top_value (unrollType lv_typ))
2206- | _ ->
2207- (* check if s1 is potentially a string literal as writing to it would be undefined behavior; then return top *)
2208- let _ = AD. string_writing_defined s1_a in
2209- s1_a, s1_typ, VD. top_value (unrollType s1_typ)
2192+ let address_from_value (v :value ) = match v with
2193+ | Address a ->
2194+ (* TODO: is it fine to just drop the last index unconditionally? https://github.com/goblint/analyzer/pull/1076#discussion_r1408975611 *)
2195+ let rec lo = function
2196+ | `Index (i , `NoOffset) -> `NoOffset
2197+ | `NoOffset -> `NoOffset
2198+ | `Field (f , o ) -> `Field (f, lo o)
2199+ | `Index (i , o ) -> `Index (i, lo o) in
2200+ let rmLastOffset = function
2201+ | Addr. Addr (v , o ) -> Addr. Addr (v, lo o)
2202+ | other -> other in
2203+ AD. map rmLastOffset a
2204+ | _ -> raise (Failure " String function: not an address" )
2205+ in
2206+ let string_manipulation s1 s2 lv all op_addr op_array =
2207+ let s1_v = eval_rv (Analyses. ask_of_ctx ctx) gs st s1 in
2208+ let s1_a = address_from_value s1_v in
2209+ let s1_typ = AD. type_of s1_a in
2210+ let s2_v = eval_rv (Analyses. ask_of_ctx ctx) gs st s2 in
2211+ let s2_a = address_from_value s2_v in
2212+ let s2_typ = AD. type_of s2_a in
2213+ (* compute value in string literals domain if s1 and s2 are both string literals *)
2214+ (* TODO: is this reliable? there could be a char* which isn't StrPtr *)
2215+ if CilType.Typ. equal s1_typ charPtrType && CilType.Typ. equal s2_typ charPtrType then
2216+ begin match lv, op_addr with
2217+ | Some lv_val , Some f ->
2218+ (* when whished types coincide, compute result of operation op_addr, otherwise use top *)
2219+ let lv_a = eval_lv (Analyses. ask_of_ctx ctx) gs st lv_val in
2220+ let lv_typ = Cilfacade. typeOfLval lv_val in
2221+ if all && typeSig s1_typ = typeSig s2_typ && typeSig s2_typ = typeSig lv_typ then (* all types need to coincide *)
2222+ set ~ctx (Analyses. ask_of_ctx ctx) gs st lv_a lv_typ (f s1_a s2_a)
2223+ else if not all && typeSig s1_typ = typeSig s2_typ then (* only the types of s1 and s2 need to coincide *)
2224+ set ~ctx (Analyses. ask_of_ctx ctx) gs st lv_a lv_typ (f s1_a s2_a)
2225+ else
2226+ set ~ctx (Analyses. ask_of_ctx ctx) gs st lv_a lv_typ (VD. top_value (unrollType lv_typ))
2227+ | _ ->
2228+ (* check if s1 is potentially a string literal as writing to it would be undefined behavior; then return top *)
2229+ let _ = AD. string_writing_defined s1_a in
2230+ set ~ctx (Analyses. ask_of_ctx ctx) gs st s1_a s1_typ (VD. top_value (unrollType s1_typ))
2231+ end
2232+ (* else compute value in array domain *)
2233+ else
2234+ let lv_a, lv_typ = match lv with
2235+ | Some lv_val -> eval_lv (Analyses. ask_of_ctx ctx) gs st lv_val, Cilfacade. typeOfLval lv_val
2236+ | None -> s1_a, s1_typ in
2237+ begin match (get (Analyses. ask_of_ctx ctx) gs st s1_a None ), get (Analyses. ask_of_ctx ctx) gs st s2_a None with
2238+ | Array array_s1 , Array array_s2 -> set ~ctx ~blob_destructive: true (Analyses. ask_of_ctx ctx) gs st lv_a lv_typ (op_array array_s1 array_s2)
2239+ | Array array_s1 , _ when CilType.Typ. equal s2_typ charPtrType ->
2240+ let s2_null_bytes = List. map CArrays. to_null_byte_domain (AD. to_string s2_a) in
2241+ let array_s2 = List. fold_left CArrays. join (CArrays. bot () ) s2_null_bytes in
2242+ set ~ctx ~blob_destructive: true (Analyses. ask_of_ctx ctx) gs st lv_a lv_typ (op_array array_s1 array_s2)
2243+ | Bot , Array array_s2 ->
2244+ (* If we have bot inside here, we assume the blob is used as a char array and create one inside *)
2245+ let ptrdiff_ik = Cilfacade. ptrdiff_ikind () in
2246+ let size = ctx.ask (Q. BlobSize {exp = s1; base_address = false }) in
2247+ let s_id =
2248+ try ValueDomainQueries.ID. unlift (ID. cast_to ptrdiff_ik) size
2249+ with Failure _ -> ID. top_of ptrdiff_ik in
2250+ let empty_array = CArrays. make s_id (Int (ID. top_of IChar )) in
2251+ set ~ctx (Analyses. ask_of_ctx ctx) gs st lv_a lv_typ (op_array empty_array array_s2)
2252+ | Bot , _ when CilType.Typ. equal s2_typ charPtrType ->
2253+ (* If we have bot inside here, we assume the blob is used as a char array and create one inside *)
2254+ let ptrdiff_ik = Cilfacade. ptrdiff_ikind () in
2255+ let size = ctx.ask (Q. BlobSize {exp = s1; base_address = false }) in
2256+ let s_id =
2257+ try ValueDomainQueries.ID. unlift (ID. cast_to ptrdiff_ik) size
2258+ with Failure _ -> ID. top_of ptrdiff_ik in
2259+ let empty_array = CArrays. make s_id (Int (ID. top_of IChar )) in
2260+ let s2_null_bytes = List. map CArrays. to_null_byte_domain (AD. to_string s2_a) in
2261+ let array_s2 = List. fold_left CArrays. join (CArrays. bot () ) s2_null_bytes in
2262+ set ~ctx (Analyses. ask_of_ctx ctx) gs st lv_a lv_typ (op_array empty_array array_s2)
2263+ | _ , Array array_s2 when CilType.Typ. equal s1_typ charPtrType ->
2264+ (* if s1 is string literal, str(n)cpy and str(n)cat are undefined *)
2265+ if op_addr = None then
2266+ (* triggers warning, function only evaluated for side-effects *)
2267+ let _ = AD. string_writing_defined s1_a in
2268+ set ~ctx (Analyses. ask_of_ctx ctx) gs st s1_a s1_typ (VD. top_value (unrollType s1_typ))
2269+ else
2270+ let s1_null_bytes = List. map CArrays. to_null_byte_domain (AD. to_string s1_a) in
2271+ let array_s1 = List. fold_left CArrays. join (CArrays. bot () ) s1_null_bytes in
2272+ set ~ctx (Analyses. ask_of_ctx ctx) gs st lv_a lv_typ (op_array array_s1 array_s2)
2273+ | _ ->
2274+ set ~ctx (Analyses. ask_of_ctx ctx) gs st lv_a lv_typ (VD. top_value (unrollType lv_typ))
2275+ end
22102276 in
22112277 let st = match desc.special args, f.vname with
22122278 | Memset { dest; ch; count; } , _ ->
@@ -2229,53 +2295,51 @@ struct
22292295 set ~ctx (Analyses. ask_of_ctx ctx) gs st dest_a dest_typ value
22302296 | Memcpy { dest = dst ; src; n; } , _ -> (* TODO: use n *)
22312297 memory_copying dst src (Some n)
2232- (* strcpy(dest, src); *)
2233- | Strcpy { dest = dst ; src; n = None } , _ ->
2234- let dest_a, dest_typ = addr_type_of_exp dst in
2235- (* when dest surely isn't a string literal, try copying src to dest *)
2236- if AD. string_writing_defined dest_a then
2237- memory_copying dst src None
2238- else
2239- (* else return top (after a warning was issued) *)
2240- set ~ctx (Analyses. ask_of_ctx ctx) gs st dest_a dest_typ (VD. top_value (unrollType dest_typ))
2241- (* strncpy(dest, src, n); *)
2242- | Strcpy { dest = dst ; src; n } , _ ->
2243- begin match eval_n n with
2244- | Some num ->
2245- let dest_a, dest_typ, value = string_manipulation dst src None false None in
2246- set ~ctx (Analyses. ask_of_ctx ctx) gs st dest_a dest_typ value
2247- | None -> failwith " already handled in case above"
2248- end
2249- | Strcat { dest = dst ; src; n } , _ ->
2250- let dest_a, dest_typ, value = string_manipulation dst src None false None in
2251- set ~ctx (Analyses. ask_of_ctx ctx) gs st dest_a dest_typ value
2298+ | Strcpy { dest = dst ; src; n } , _ -> string_manipulation dst src None false None (fun ar1 ar2 -> Array (CArrays. string_copy ar1 ar2 (eval_n n)))
2299+ | Strcat { dest = dst ; src; n } , _ -> string_manipulation dst src None false None (fun ar1 ar2 -> Array (CArrays. string_concat ar1 ar2 (eval_n n)))
22522300 | Strlen s , _ ->
22532301 begin match lv with
22542302 | Some lv_val ->
22552303 let dest_a = eval_lv (Analyses. ask_of_ctx ctx) gs st lv_val in
22562304 let dest_typ = Cilfacade. typeOfLval lv_val in
2257- let lval = mkMem ~addr: (Cil. stripCasts s) ~off: NoOffset in
2258- let address = eval_lv (Analyses. ask_of_ctx ctx) gs st lval in
2259- let (value:value ) = Int (AD. to_string_length address) in
2305+ let v = eval_rv (Analyses. ask_of_ctx ctx) gs st s in
2306+ let a = address_from_value v in
2307+ let value:value =
2308+ (* if s string literal, compute strlen in string literals domain *)
2309+ (* TODO: is this reliable? there could be a char* which isn't StrPtr *)
2310+ if CilType.Typ. equal (AD. type_of a) charPtrType then
2311+ Int (AD. to_string_length a)
2312+ (* else compute strlen in array domain *)
2313+ else
2314+ begin match get (Analyses. ask_of_ctx ctx) gs st a None with
2315+ | Array array_s -> Int (CArrays. to_string_length array_s)
2316+ | _ -> VD. top_value (unrollType dest_typ)
2317+ end in
22602318 set ~ctx (Analyses. ask_of_ctx ctx) gs st dest_a dest_typ value
22612319 | None -> st
22622320 end
22632321 | Strstr { haystack; needle } , _ ->
22642322 begin match lv with
2265- | Some _ ->
2266- (* when haystack, needle and dest type coincide, check if needle is a substring of haystack:
2267- if that is the case, assign the substring of haystack starting at the first occurrence of needle to dest,
2268- else use top *)
2269- let dest_a, dest_typ, value = string_manipulation haystack needle lv true (Some (fun h_a n_a -> Address (AD. substring_extraction h_a n_a))) in
2270- set ~ctx (Analyses. ask_of_ctx ctx) gs st dest_a dest_typ value
2323+ | Some lv_val ->
2324+ (* check if needle is a substring of haystack in string literals domain if haystack and needle are string literals,
2325+ else check in null bytes domain if both haystack and needle are / can be transformed to an array domain representation;
2326+ if needle is substring, assign the substring of haystack starting at the first occurrence of needle to dest,
2327+ if it surely isn't, assign a null_ptr *)
2328+ string_manipulation haystack needle lv true (Some (fun h_a n_a -> Address (AD. substring_extraction h_a n_a)))
2329+ (fun h_ar n_ar -> match CArrays. substring_extraction h_ar n_ar with
2330+ | CArrays. IsNotSubstr -> Address (AD. null_ptr)
2331+ | CArrays. IsSubstrAtIndex0 -> Address (eval_lv (Analyses. ask_of_ctx ctx) gs st (mkMem ~addr: (Cil. stripCasts haystack) ~off: NoOffset ))
2332+ | CArrays. IsMaybeSubstr -> Address (AD. join (eval_lv (Analyses. ask_of_ctx ctx) gs st
2333+ (mkMem ~addr: (Cil. stripCasts haystack) ~off: (Index (Offset.Index.Exp. any, NoOffset )))) (AD. null_ptr)))
22712334 | None -> st
22722335 end
22732336 | Strcmp { s1; s2; n } , _ ->
22742337 begin match lv with
22752338 | Some _ ->
2276- (* when s1 and s2 type coincide, compare both both strings completely or their first n characters, otherwise use top *)
2277- let dest_a, dest_typ, value = string_manipulation s1 s2 lv false (Some (fun s1_a s2_a -> Int (AD. string_comparison s1_a s2_a (eval_n n)))) in
2278- set ~ctx (Analyses. ask_of_ctx ctx) gs st dest_a dest_typ value
2339+ (* when s1 and s2 are string literals, compare both completely or their first n characters in the string literals domain;
2340+ else compare them in the null bytes array domain if they are / can be transformed to an array domain representation *)
2341+ string_manipulation s1 s2 lv false (Some (fun s1_a s2_a -> Int (AD. string_comparison s1_a s2_a (eval_n n))))
2342+ (fun s1_ar s2_ar -> Int (CArrays. string_comparison s1_ar s2_ar (eval_n n)))
22792343 | None -> st
22802344 end
22812345 | Abort , _ -> raise Deadcode
0 commit comments