Skip to content

Commit af44a24

Browse files
committed
[herd] More efficient computation of atomic load X stores pairs
We take inspiration from the efficient computation of register read-from (see PR #1704): atomic stores by a given thread and to a given location are ordered according to (extended by iico) program order and an atomic load is paired with the closest atomic store that follows it. Additionaly it is checked that there is no atomic load in-between. That is, the next effect in (generalised) po indeed is a write.
1 parent 86b798a commit af44a24

File tree

3 files changed

+103
-45
lines changed

3 files changed

+103
-45
lines changed

herd/mem.ml

Lines changed: 71 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -901,12 +901,10 @@ struct
901901

902902
let find_first_after e set =
903903
find_first_opt (fun e' -> is_before_strict e e') set
904-
[@@warning "-32"]
904+
905905
end
906906

907-
let map_loc_find loc m =
908-
try U.LocEnv.find loc m
909-
with Not_found -> []
907+
let map_loc_find loc m = U.LocEnv.safe_find [] loc m
910908

911909
let match_reg_events add_eq es csn =
912910
let loc_loads_stores = U.collect_reg_loads_stores es in
@@ -1827,40 +1825,74 @@ let get_rf_value test read =
18271825
| _,_ -> k)
18281826
rfm E.EventRel.empty
18291827

1830-
(* Reconstruct load/store atomic pairs *)
1828+
(*
1829+
* Reconstruct load/store atomic pairs,
1830+
* By definition such a pair exists when the
1831+
* store precedes the load in generalised program order
1832+
* (_i.e._ the union of program order and of iico), and
1833+
* that there is no atomic effect to the same location
1834+
* in-between (w.r.t generalised po) the load and the store.
1835+
* Computation proceeds as follows:
1836+
* First, atomic events are grouped first by thread
1837+
* and then by location. Then, to each atomic load, we
1838+
* associate the closest generalised po successor store,
1839+
* by using a set of stores ordered by generalised po.
1840+
* We additionally check that no atomic load exists
1841+
* between the load and store. Notice that it is not possible
1842+
* to use a set of all atomic events (by a given thread and
1843+
* with a given location) ordered by po because some atomic loads
1844+
* may be unrelated.
1845+
* Finally, such atomic pairs can be spurious, that is not performed
1846+
* by a specific thread. In that case, pairs are given
1847+
* simply by the intra causality data relation.
1848+
*)
18311849

18321850
let make_atomic_load_store es =
1833-
let all = E.atomics_of es.E.events in
1834-
let atms = U.collect_atomics es in
1835-
U.LocEnv.fold
1836-
(fun _loc atms k ->
1837-
let atms =
1838-
List.filter
1839-
(fun e -> not (E.is_load e && E.is_store e))
1840-
atms in (* get rid of C RMW *)
1841-
let rs,ws = List.partition E.is_load atms in
1842-
List.fold_left
1843-
(fun k r ->
1844-
let exp = E.is_explicit r in
1845-
List.fold_left
1846-
(fun k w ->
1847-
if
1848-
S.atomic_pair_allowed r w &&
1849-
U.is_before_strict es r w &&
1850-
E.is_explicit w = exp &&
1851-
not
1852-
(E.EventSet.exists
1853-
(fun e ->
1854-
E.is_explicit e = exp &&
1855-
U.is_before_strict es r e &&
1856-
U.is_before_strict es e w)
1857-
all)
1858-
then E.EventRel.add (r,w) k
1859-
else k)
1860-
k ws)
1861-
k rs)
1862-
atms E.EventRel.empty
1863-
1851+
let atms,spurious = U.collect_atomics es in
1852+
let module StoreSet = EvtSetByPo(struct let es = es end) in
1853+
let make_atomic_pairs es k =
1854+
let rs,ws = List.partition E.is_load es in
1855+
let ws = StoreSet.of_list ws
1856+
and intervening_read er ew =
1857+
List.exists
1858+
(fun e ->
1859+
StoreSet.is_before_strict er e
1860+
&& StoreSet.is_before_strict e ew)
1861+
rs in
1862+
List.fold_left
1863+
(fun k er ->
1864+
match StoreSet.find_first_after er ws with
1865+
| Some ew ->
1866+
if
1867+
S.atomic_pair_allowed er ew
1868+
&& not (intervening_read er ew)
1869+
then
1870+
E.EventRel.add (er,ew) k
1871+
else k
1872+
| None -> k)
1873+
k rs in
1874+
let r1 =
1875+
List.map
1876+
(fun (_,m) ->
1877+
U.LocEnv.fold
1878+
(fun _loc es k ->
1879+
let exps,nexps = List.partition E.is_explicit es in
1880+
make_atomic_pairs exps @@ make_atomic_pairs nexps k)
1881+
m E.EventRel.empty)
1882+
atms |> E.EventRel.unions
1883+
and r2 =
1884+
let iico = es.E.intra_causality_data in
1885+
List.map
1886+
(fun e ->
1887+
if E.is_load e then
1888+
match
1889+
E.EventRel.succs iico e |> E.EventSet.as_singleton
1890+
with
1891+
| None -> assert false (* spurious updates are by pairs *)
1892+
| Some w -> E.EventRel.singleton (e,w)
1893+
else E.EventRel.empty)
1894+
spurious |> E.EventRel.unions in
1895+
E.EventRel.union r1 r2
18641896

18651897
(* Retrieve last store from rfmap *)
18661898
let get_max_store _test _es rfm loc =
@@ -1955,12 +1987,9 @@ let get_rf_value test read =
19551987
if C.debug.Debug_herd.mem then begin
19561988
eprintf "Observed locs: {%s}\n" (pp_locations observed_locs)
19571989
end ;
1958-
U.LocEnv.fold
1959-
(fun loc ws k ->
1960-
if keep_observed_loc loc observed_locs then
1961-
U.LocEnv.add loc ws k
1962-
else k)
1963-
loc_stores U.LocEnv.empty
1990+
U.LocEnv.filter
1991+
(fun loc _ -> keep_observed_loc loc observed_locs)
1992+
loc_stores
19641993
else loc_stores in
19651994

19661995
let possible_finals =

herd/memUtils.ml

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -387,9 +387,26 @@ let lift_proc_info i evts =
387387
and collect_stores es = collect_by_loc es E.is_store
388388
and collect_loads_non_spec es = collect_by_loc es (fun e -> E.is_load e && not_speculated es e)
389389
and collect_stores_non_spec es = collect_by_loc es (fun e -> E.is_store e && not_speculated es e)
390-
and collect_atomics es = collect_by_loc es E.is_atomic
391390
and collect_reg_loads_stores es = collect_by_loc2 es E.is_reg_load_any E.is_reg_store_any
392391

392+
let accumulate_loc_proc proc loc e =
393+
IntMap.update proc @@
394+
function
395+
| None -> Some (LocEnv.singleton loc [e])
396+
| Some m -> Some (LocEnv.accumulate loc e m)
397+
398+
let collect_atomics es =
399+
let m,sp =
400+
E.EventSet.fold
401+
(fun e (m,sp as k) ->
402+
if E.is_atomic e then
403+
match E.proc_of e with
404+
| None -> if E.is_spurious e then (m, e::sp) else k
405+
| Some proc -> accumulate_loc_proc proc (get_loc e) e m, sp
406+
else k)
407+
es.E.events (IntMap.empty,[]) in
408+
IntMap.bindings m,sp
409+
393410
let partition_events es =
394411
let env =
395412
E.EventSet.fold

herd/memUtils.mli

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ module Make : functor (S: SemExtra.S) -> sig
7070
relation *)
7171
val make_fr : S.concrete -> S.event_rel -> S.event_rel
7272

73-
(* Mapping from locations *)
73+
(* Mapping from locations, a.k.a. location maps *)
7474
module LocEnv : MyMap.S with type key = S.location
7575

7676
(* Collect various events, indexed by location *)
@@ -85,7 +85,19 @@ module Make : functor (S: SemExtra.S) -> sig
8585
val collect_stores : S.event_structure -> S.event list LocEnv.t
8686
val collect_stores_non_spec : S.event_structure -> S.event list LocEnv.t
8787
val collect_loads_non_spec : S.event_structure -> S.event list LocEnv.t
88-
val collect_atomics : S.event_structure -> S.event list LocEnv.t
88+
89+
(*
90+
* Collect atomic effects indexed by threads and by locations.
91+
* When given an event structure as argument, the function
92+
* returns a pair [(maps,evts)], where:
93+
* + [maps] is a list of location maps, one per thread.
94+
* The values of this map are the atomic effects
95+
of the given thread.
96+
* + [evts] is the list of spurious (atomic) effects.
97+
*)
98+
99+
val collect_atomics :
100+
S.event_structure -> (Proc.t * S.event list LocEnv.t) list * S.event list
89101

90102
(* Partition by location *)
91103
val partition_events : S.event_set -> S.event_set list

0 commit comments

Comments
 (0)