Skip to content

Commit 399c3ed

Browse files
committed
[herd] More efficient computation of atomic load X stores pairs
We take inspiration from the efficient computation of register read-from (see PR #1704): atomic stores by a given thread and to a given location are ordered according to (extended by iico) program order and an atomic load is paired with the closest atomic store that follows it. Additionaly it is checked that there is no atomic load in-between. That is, the next effect in (generalised) po indeed is a write.
1 parent 2daa1fb commit 399c3ed

File tree

3 files changed

+137
-63
lines changed

3 files changed

+137
-63
lines changed

herd/mem.ml

Lines changed: 106 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -864,28 +864,45 @@ and get_written e = match E.written_of e with
864864
(* Compute rfmap for registers *)
865865
(*******************************)
866866

867-
let map_loc_find loc m =
868-
try U.LocEnv.find loc m
869-
with Not_found -> []
867+
let map_loc_find loc m = U.LocEnv.safe_find [] loc m
868+
869+
(* Event set ordered by to (generalised) program order *)
870+
871+
module
872+
EvtSetByPo
873+
(I:sig val es : S.event_structure end) =
874+
struct
875+
876+
let is_before_strict = U.is_before_strict I.es
877+
878+
include Set.Make
879+
(struct
880+
881+
type t = E.event
882+
883+
let compare e1 e2 =
884+
if is_before_strict e1 e2 then -1
885+
else if is_before_strict e2 e1 then 1
886+
else
887+
let () =
888+
Printf.eprintf "Not ordered stores %a and %a\n" E.debug_event e1
889+
E.debug_event e2
890+
in
891+
assert false
892+
893+
end)
894+
895+
let find_last_before set e =
896+
find_last_opt (fun e' -> is_before_strict e' e) set
897+
898+
let find_first_after e set =
899+
find_first_opt (fun e' -> is_before_strict e e') set
900+
901+
end
870902

871903
let match_reg_events add_eq es csn =
872904
let loc_loads_stores = U.collect_reg_loads_stores es in
873-
let is_before_strict = U.is_before_strict es in
874-
let compare e1 e2 =
875-
if is_before_strict e1 e2 then -1
876-
else if is_before_strict e2 e1 then 1
877-
else
878-
let () =
879-
Printf.eprintf "Not ordered stores %a and %a\n" E.debug_event e1
880-
E.debug_event e2
881-
in
882-
assert false
883-
in
884-
let module StoreSet = Set.Make (struct
885-
type t = E.event
886-
887-
let compare = compare
888-
end) in
905+
let module StoreSet = EvtSetByPo(struct let es = es end) in
889906
let add wt rf (rfm, csn) = (S.RFMap.add wt rf rfm, add_eq rfm wt rf csn) in
890907
(* For all loads find the right store, the one "just before" the load *)
891908
U.LocEnv.fold
@@ -901,9 +918,7 @@ let match_reg_events add_eq es csn =
901918
(* Add the corresponding store for each load *)
902919
List.fold_left
903920
(fun k load ->
904-
let f e = is_before_strict e load in
905-
let rf =
906-
match StoreSet.find_last_opt f stores with
921+
let rf = match StoreSet.find_last_before stores load with
907922
| Some store -> S.Store store
908923
| None -> S.Init
909924
in
@@ -1801,40 +1816,74 @@ let get_rf_value test read =
18011816
| _,_ -> k)
18021817
rfm E.EventRel.empty
18031818

1804-
(* Reconstruct load/store atomic pairs *)
1819+
(*
1820+
* Reconstruct load/store atomic pairs,
1821+
* By definition such a pair exist when the
1822+
* store precedes the load in generalised program order
1823+
* (_i.e._ the union of program order and of iico), and
1824+
* that there is no atomic effect to the same location
1825+
* in-between (w.r.t generalised po) the load and the store.
1826+
* Computation proceeds as follows:
1827+
* First, atomic events are grouped first by thread
1828+
* and then by location. Then, to each atomic load, we
1829+
* associate the closest generalised po successor store,
1830+
* by using a set of stores ordered by generalised po.
1831+
* We additionally check that no atomic load exists
1832+
* between the load and store. Notice that it is not possible
1833+
* to use a set of all atomic events (by a given thread and
1834+
* with a given location) ordered by po because some atomic loads
1835+
* may be unrelated.
1836+
* Finally, such atomic pairs can be spurious, that is not performed
1837+
* by a specific thread. In that case, pairs are given
1838+
* simply by the intra causality data relation.
1839+
*)
18051840

18061841
let make_atomic_load_store es =
1807-
let all = E.atomics_of es.E.events in
1808-
let atms = U.collect_atomics es in
1809-
U.LocEnv.fold
1810-
(fun _loc atms k ->
1811-
let atms =
1812-
List.filter
1813-
(fun e -> not (E.is_load e && E.is_store e))
1814-
atms in (* get rid of C RMW *)
1815-
let rs,ws = List.partition E.is_load atms in
1816-
List.fold_left
1817-
(fun k r ->
1818-
let exp = E.is_explicit r in
1819-
List.fold_left
1820-
(fun k w ->
1821-
if
1822-
S.atomic_pair_allowed r w &&
1823-
U.is_before_strict es r w &&
1824-
E.is_explicit w = exp &&
1825-
not
1826-
(E.EventSet.exists
1827-
(fun e ->
1828-
E.is_explicit e = exp &&
1829-
U.is_before_strict es r e &&
1830-
U.is_before_strict es e w)
1831-
all)
1832-
then E.EventRel.add (r,w) k
1833-
else k)
1834-
k ws)
1835-
k rs)
1836-
atms E.EventRel.empty
1837-
1842+
let atms,spurious = U.collect_atomics es in
1843+
let module StoreSet = EvtSetByPo(struct let es = es end) in
1844+
let make_atomic_pairs es k =
1845+
let rs,ws = List.partition E.is_load es in
1846+
let ws = StoreSet.of_list ws
1847+
and intervening_read er ew =
1848+
List.exists
1849+
(fun e ->
1850+
StoreSet.is_before_strict er e
1851+
&& StoreSet.is_before_strict e ew)
1852+
rs in
1853+
List.fold_left
1854+
(fun k er ->
1855+
match StoreSet.find_first_after er ws with
1856+
| Some ew ->
1857+
if
1858+
S.atomic_pair_allowed er ew
1859+
&& not (intervening_read er ew)
1860+
then
1861+
E.EventRel.add (er,ew) k
1862+
else k
1863+
| None -> k)
1864+
k rs in
1865+
let r1 =
1866+
List.map
1867+
(fun (_,m) ->
1868+
U.LocEnv.fold
1869+
(fun _loc es k ->
1870+
let exps,nexps = List.partition E.is_explicit es in
1871+
make_atomic_pairs exps @@ make_atomic_pairs nexps k)
1872+
m E.EventRel.empty)
1873+
atms |> E.EventRel.unions
1874+
and r2 =
1875+
let iico = es.E.intra_causality_data in
1876+
List.map
1877+
(fun e ->
1878+
if E.is_load e then
1879+
match
1880+
E.EventRel.succs iico e |> E.EventSet.as_singleton
1881+
with
1882+
| None -> assert false (* spurious updates are by pairs *)
1883+
| Some w -> E.EventRel.singleton (e,w)
1884+
else E.EventRel.empty)
1885+
spurious |> E.EventRel.unions in
1886+
E.EventRel.union r1 r2
18381887

18391888
(* Retrieve last store from rfmap *)
18401889
let get_max_store _test _es rfm loc =
@@ -1929,12 +1978,9 @@ let get_rf_value test read =
19291978
if C.debug.Debug_herd.mem then begin
19301979
eprintf "Observed locs: {%s}\n" (pp_locations observed_locs)
19311980
end ;
1932-
U.LocEnv.fold
1933-
(fun loc ws k ->
1934-
if keep_observed_loc loc observed_locs then
1935-
U.LocEnv.add loc ws k
1936-
else k)
1937-
loc_stores U.LocEnv.empty
1981+
U.LocEnv.filter
1982+
(fun loc _ -> keep_observed_loc loc observed_locs)
1983+
loc_stores
19381984
else loc_stores in
19391985

19401986
let possible_finals =

herd/memUtils.ml

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -387,9 +387,25 @@ let lift_proc_info i evts =
387387
and collect_stores es = collect_by_loc es E.is_store
388388
and collect_loads_non_spec es = collect_by_loc es (fun e -> E.is_load e && not_speculated es e)
389389
and collect_stores_non_spec es = collect_by_loc es (fun e -> E.is_store e && not_speculated es e)
390-
and collect_atomics es = collect_by_loc es E.is_atomic
391390
and collect_reg_loads_stores es = collect_by_loc2 es E.is_reg_load_any E.is_reg_store_any
392391

392+
let accumulate_loc_proc proc loc e =
393+
IntMap.update proc @@ function
394+
| None -> Some (LocEnv.singleton loc [e])
395+
| Some m -> Some (LocEnv.accumulate loc e m)
396+
397+
let collect_atomics es =
398+
let m,sp =
399+
E.EventSet.fold
400+
(fun e (m,sp as k) ->
401+
if E.is_atomic e then
402+
match E.proc_of e with
403+
| None -> if E.is_spurious e then (m, e::sp) else k
404+
| Some proc -> accumulate_loc_proc proc (get_loc e) e m, sp
405+
else k)
406+
es.E.events (IntMap.empty,[]) in
407+
IntMap.bindings m,sp
408+
393409
let partition_events es =
394410
let env =
395411
E.EventSet.fold

herd/memUtils.mli

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ module Make : functor (S: SemExtra.S) -> sig
7070
relation *)
7171
val make_fr : S.concrete -> S.event_rel -> S.event_rel
7272

73-
(* Mapping from locations *)
73+
(* Mapping from locations, a.k.a. location maps *)
7474
module LocEnv : MyMap.S with type key = S.location
7575

7676
(* Collect various events, indexed by location *)
@@ -85,7 +85,19 @@ module Make : functor (S: SemExtra.S) -> sig
8585
val collect_stores : S.event_structure -> S.event list LocEnv.t
8686
val collect_stores_non_spec : S.event_structure -> S.event list LocEnv.t
8787
val collect_loads_non_spec : S.event_structure -> S.event list LocEnv.t
88-
val collect_atomics : S.event_structure -> S.event list LocEnv.t
88+
89+
(*
90+
* Collect atomic effects indexed by threads and by locations.
91+
* When given an event structure as argument, the function
92+
* returns a pair [(maps,evts)], where:
93+
* + [maps] is a list of location maps, one per thread.
94+
* The values of this map are the atomic effects
95+
of the given thread.
96+
* + [evts] is the list of spurious (atomic) effects.
97+
*)
98+
99+
val collect_atomics :
100+
S.event_structure -> (Proc.t * S.event list LocEnv.t) list * S.event list
89101

90102
(* Partition by location *)
91103
val partition_events : S.event_set -> S.event_set list

0 commit comments

Comments
 (0)