Skip to content

Commit 4f17eab

Browse files
committed
docs(replications): add explanation of algorithm + diagrams (#43)
1 parent 10bd238 commit 4f17eab

File tree

6 files changed

+819
-2
lines changed

6 files changed

+819
-2
lines changed

images/replications_algorithm.drawio

Lines changed: 419 additions & 0 deletions
Large diffs are not rendered by default.

images/replications_algorithm.png

1.06 MB
Loading
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
<mxfile host="app.diagrams.net" agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36" version="26.1.1">
2+
<diagram name="Page-1" id="ynTKS2v_TZv17swCPKiS">
3+
<mxGraphModel dx="1097" dy="831" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="827" pageHeight="1169" math="0" shadow="0">
4+
<root>
5+
<mxCell id="0" />
6+
<mxCell id="1" parent="0" />
7+
<mxCell id="q0k77vlWbUJt_Vhd_Qly-42" value="" style="rounded=1;whiteSpace=wrap;html=1;strokeColor=none;fillColor=#EEEEEE;fontStyle=1;opacity=50;" parent="1" vertex="1">
8+
<mxGeometry x="336.5" y="40" width="543.5" height="436.5" as="geometry" />
9+
</mxCell>
10+
<mxCell id="ltitkqDnKHNBnyyiyaz9-67" value="" style="rounded=1;whiteSpace=wrap;html=1;strokeColor=none;fillColor=#EEEEEE;fontStyle=1;opacity=50;" parent="1" vertex="1">
11+
<mxGeometry x="40" y="111" width="270" height="565.5" as="geometry" />
12+
</mxCell>
13+
<mxCell id="q0k77vlWbUJt_Vhd_Qly-6" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" parent="1" source="ltitkqDnKHNBnyyiyaz9-6" target="q0k77vlWbUJt_Vhd_Qly-5" edge="1">
14+
<mxGeometry relative="1" as="geometry" />
15+
</mxCell>
16+
<mxCell id="ltitkqDnKHNBnyyiyaz9-6" value="&lt;font style=&quot;font-size: 18px;&quot;&gt;WelfordStats&lt;/font&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#68A9D4;" parent="1" vertex="1">
17+
<mxGeometry x="377.94" y="56.5" width="154.75" height="40" as="geometry" />
18+
</mxCell>
19+
<mxCell id="ltitkqDnKHNBnyyiyaz9-41" value="Key:" style="text;html=1;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=1;fontStyle=1;" parent="1" vertex="1">
20+
<mxGeometry x="756.5" y="495.5" width="45" height="25" as="geometry" />
21+
</mxCell>
22+
<mxCell id="ltitkqDnKHNBnyyiyaz9-43" value="Methods" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#F2A25C;" parent="1" vertex="1">
23+
<mxGeometry x="711.5" y="608.5" width="135" height="30" as="geometry" />
24+
</mxCell>
25+
<mxCell id="ltitkqDnKHNBnyyiyaz9-44" value="Instance of R6 class" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#A9E4F5;" parent="1" vertex="1">
26+
<mxGeometry x="711.5" y="569.5" width="135" height="30" as="geometry" />
27+
</mxCell>
28+
<mxCell id="ltitkqDnKHNBnyyiyaz9-45" value="R6 class" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#68A9D4;" parent="1" vertex="1">
29+
<mxGeometry x="711.5" y="531" width="135" height="30" as="geometry" />
30+
</mxCell>
31+
<mxCell id="ltitkqDnKHNBnyyiyaz9-88" value="" style="curved=1;endArrow=classic;html=1;rounded=1;dashed=1;exitX=1;exitY=0;exitDx=0;exitDy=0;" parent="1" edge="1">
32+
<mxGeometry width="50" height="50" relative="1" as="geometry">
33+
<mxPoint x="292.8199999999998" y="505" as="sourcePoint" />
34+
<mxPoint x="620" y="366.5" as="targetPoint" />
35+
<Array as="points">
36+
<mxPoint x="660" y="526.5" />
37+
<mxPoint x="550" y="386.5" />
38+
</Array>
39+
</mxGeometry>
40+
</mxCell>
41+
<mxCell id="q0k77vlWbUJt_Vhd_Qly-1" value="&lt;font style=&quot;font-size: 14px;&quot;&gt;new()&lt;/font&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#F2A25C;" parent="1" vertex="1">
42+
<mxGeometry x="413.00999999999993" y="206.5" width="64.37" height="30" as="geometry" />
43+
</mxCell>
44+
<mxCell id="q0k77vlWbUJt_Vhd_Qly-34" style="edgeStyle=orthogonalEdgeStyle;rounded=1;orthogonalLoop=1;jettySize=auto;html=1;exitX=0;exitY=0.5;exitDx=0;exitDy=0;entryX=0.75;entryY=0;entryDx=0;entryDy=0;curved=0;" parent="1" source="q0k77vlWbUJt_Vhd_Qly-3" target="q0k77vlWbUJt_Vhd_Qly-32" edge="1">
45+
<mxGeometry relative="1" as="geometry" />
46+
</mxCell>
47+
<mxCell id="q0k77vlWbUJt_Vhd_Qly-3" value="&lt;font style=&quot;font-size: 14px;&quot;&gt;update()&lt;/font&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#F2A25C;" parent="1" vertex="1">
48+
<mxGeometry x="404.41" y="356.5" width="81.56" height="30" as="geometry" />
49+
</mxCell>
50+
<mxCell id="q0k77vlWbUJt_Vhd_Qly-7" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" parent="1" source="q0k77vlWbUJt_Vhd_Qly-5" target="q0k77vlWbUJt_Vhd_Qly-1" edge="1">
51+
<mxGeometry relative="1" as="geometry" />
52+
</mxCell>
53+
<mxCell id="q0k77vlWbUJt_Vhd_Qly-22" style="edgeStyle=orthogonalEdgeStyle;rounded=1;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;curved=0;" parent="1" source="q0k77vlWbUJt_Vhd_Qly-5" edge="1" target="q0k77vlWbUJt_Vhd_Qly-19">
54+
<mxGeometry relative="1" as="geometry">
55+
<mxPoint x="730" y="196.5" as="targetPoint" />
56+
</mxGeometry>
57+
</mxCell>
58+
<mxCell id="q0k77vlWbUJt_Vhd_Qly-5" value="&lt;font style=&quot;font-size: 14px;&quot;&gt;stats&lt;/font&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#A9E4F5;" parent="1" vertex="1">
59+
<mxGeometry x="405.94" y="127.5" width="78.5" height="30" as="geometry" />
60+
</mxCell>
61+
<mxCell id="1YQfei4QBcS1-SSEy_-4-2" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;" edge="1" parent="1" source="q0k77vlWbUJt_Vhd_Qly-8" target="q0k77vlWbUJt_Vhd_Qly-3">
62+
<mxGeometry relative="1" as="geometry" />
63+
</mxCell>
64+
<mxCell id="q0k77vlWbUJt_Vhd_Qly-8" value="Sets up class, including empty variables to store results, and calls update() for each value in data (if provided)" style="text;html=1;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=1;" parent="1" vertex="1">
65+
<mxGeometry x="357.69" y="242.5" width="175" height="58.5" as="geometry" />
66+
</mxCell>
67+
<mxCell id="q0k77vlWbUJt_Vhd_Qly-10" value="Updates count and running calculation of mean and sum of squares.&lt;div&gt;Will call observer$update().&lt;/div&gt;" style="text;html=1;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=1;" parent="1" vertex="1">
68+
<mxGeometry x="336.5" y="386.5" width="217.38" height="50" as="geometry" />
69+
</mxCell>
70+
<mxCell id="q0k77vlWbUJt_Vhd_Qly-12" value="&lt;span style=&quot;font-size: 14px;&quot;&gt;variance()&lt;/span&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#F2A25C;" parent="1" vertex="1">
71+
<mxGeometry x="580" y="319" width="90" height="30" as="geometry" />
72+
</mxCell>
73+
<mxCell id="q0k77vlWbUJt_Vhd_Qly-13" value="&lt;span style=&quot;font-size: 14px;&quot;&gt;std()&lt;/span&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#F2A25C;" parent="1" vertex="1">
74+
<mxGeometry x="680.5" y="319" width="90" height="30" as="geometry" />
75+
</mxCell>
76+
<mxCell id="q0k77vlWbUJt_Vhd_Qly-14" value="&lt;span style=&quot;font-size: 14px;&quot;&gt;std_error()&lt;/span&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#F2A25C;" parent="1" vertex="1">
77+
<mxGeometry x="778" y="319" width="90" height="30" as="geometry" />
78+
</mxCell>
79+
<mxCell id="q0k77vlWbUJt_Vhd_Qly-15" value="&lt;span style=&quot;font-size: 14px;&quot;&gt;half_width()&lt;/span&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#F2A25C;" parent="1" vertex="1">
80+
<mxGeometry x="635.5" y="358" width="90" height="30" as="geometry" />
81+
</mxCell>
82+
<mxCell id="q0k77vlWbUJt_Vhd_Qly-16" value="&lt;span style=&quot;font-size: 14px;&quot;&gt;lci()&lt;/span&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#F2A25C;" parent="1" vertex="1">
83+
<mxGeometry x="734" y="357" width="90" height="30" as="geometry" />
84+
</mxCell>
85+
<mxCell id="q0k77vlWbUJt_Vhd_Qly-17" value="&lt;span style=&quot;font-size: 14px;&quot;&gt;uci()&lt;/span&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#F2A25C;" parent="1" vertex="1">
86+
<mxGeometry x="635.5" y="394" width="90" height="30" as="geometry" />
87+
</mxCell>
88+
<mxCell id="q0k77vlWbUJt_Vhd_Qly-18" value="&lt;span style=&quot;font-size: 14px;&quot;&gt;deviation()&lt;/span&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#F2A25C;" parent="1" vertex="1">
89+
<mxGeometry x="734" y="394" width="90" height="30" as="geometry" />
90+
</mxCell>
91+
<mxCell id="q0k77vlWbUJt_Vhd_Qly-19" value="Can calculate various statistics based on the current count, mean and variance" style="text;html=1;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=1;" parent="1" vertex="1">
92+
<mxGeometry x="610.75" y="279" width="229.5" height="33" as="geometry" />
93+
</mxCell>
94+
<mxCell id="q0k77vlWbUJt_Vhd_Qly-27" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" parent="1" source="q0k77vlWbUJt_Vhd_Qly-24" target="q0k77vlWbUJt_Vhd_Qly-25" edge="1">
95+
<mxGeometry relative="1" as="geometry" />
96+
</mxCell>
97+
<mxCell id="q0k77vlWbUJt_Vhd_Qly-24" value="&lt;font style=&quot;font-size: 18px;&quot;&gt;ReplicationTabuliser&lt;/font&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#68A9D4;" parent="1" vertex="1">
98+
<mxGeometry x="80" y="127.5" width="190" height="40" as="geometry" />
99+
</mxCell>
100+
<mxCell id="q0k77vlWbUJt_Vhd_Qly-35" style="edgeStyle=orthogonalEdgeStyle;shape=connector;curved=0;rounded=1;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;strokeColor=default;align=center;verticalAlign=middle;fontFamily=Helvetica;fontSize=11;fontColor=default;labelBackgroundColor=default;endArrow=classic;" parent="1" source="q0k77vlWbUJt_Vhd_Qly-25" target="q0k77vlWbUJt_Vhd_Qly-29" edge="1">
101+
<mxGeometry relative="1" as="geometry" />
102+
</mxCell>
103+
<mxCell id="q0k77vlWbUJt_Vhd_Qly-36" style="edgeStyle=orthogonalEdgeStyle;shape=connector;curved=0;rounded=1;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;strokeColor=default;align=center;verticalAlign=middle;fontFamily=Helvetica;fontSize=11;fontColor=default;labelBackgroundColor=default;endArrow=classic;" parent="1" source="q0k77vlWbUJt_Vhd_Qly-25" target="q0k77vlWbUJt_Vhd_Qly-32" edge="1">
104+
<mxGeometry relative="1" as="geometry">
105+
<Array as="points">
106+
<mxPoint x="166" y="376.5" />
107+
<mxPoint x="235" y="376.5" />
108+
</Array>
109+
</mxGeometry>
110+
</mxCell>
111+
<mxCell id="q0k77vlWbUJt_Vhd_Qly-41" style="edgeStyle=orthogonalEdgeStyle;shape=connector;curved=0;rounded=1;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;strokeColor=default;align=center;verticalAlign=middle;fontFamily=Helvetica;fontSize=11;fontColor=default;labelBackgroundColor=default;endArrow=classic;" parent="1" source="q0k77vlWbUJt_Vhd_Qly-25" target="q0k77vlWbUJt_Vhd_Qly-37" edge="1">
112+
<mxGeometry relative="1" as="geometry">
113+
<Array as="points">
114+
<mxPoint x="166" y="556.5" />
115+
<mxPoint x="238" y="556.5" />
116+
</Array>
117+
</mxGeometry>
118+
</mxCell>
119+
<mxCell id="q0k77vlWbUJt_Vhd_Qly-25" value="&lt;font style=&quot;font-size: 14px;&quot;&gt;observer&lt;/font&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#A9E4F5;" parent="1" vertex="1">
120+
<mxGeometry x="127.13" y="202" width="78.5" height="30" as="geometry" />
121+
</mxCell>
122+
<mxCell id="q0k77vlWbUJt_Vhd_Qly-29" value="&lt;font style=&quot;font-size: 14px;&quot;&gt;new()&lt;/font&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#F2A25C;" parent="1" vertex="1">
123+
<mxGeometry x="205.62999999999994" y="282" width="64.37" height="30" as="geometry" />
124+
</mxCell>
125+
<mxCell id="q0k77vlWbUJt_Vhd_Qly-31" value="Creates empty lists to store results" style="text;html=1;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=1;" parent="1" vertex="1">
126+
<mxGeometry x="182.82" y="312" width="110" height="40" as="geometry" />
127+
</mxCell>
128+
<mxCell id="q0k77vlWbUJt_Vhd_Qly-32" value="&lt;font style=&quot;font-size: 14px;&quot;&gt;update()&lt;/font&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#F2A25C;" parent="1" vertex="1">
129+
<mxGeometry x="200" y="456.5" width="70" height="30" as="geometry" />
130+
</mxCell>
131+
<mxCell id="q0k77vlWbUJt_Vhd_Qly-33" value="Add statistics from stats to the lists" style="text;html=1;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=1;" parent="1" vertex="1">
132+
<mxGeometry x="180" y="488" width="110" height="40" as="geometry" />
133+
</mxCell>
134+
<mxCell id="q0k77vlWbUJt_Vhd_Qly-37" value="&lt;font style=&quot;font-size: 14px;&quot;&gt;summary_table()&lt;/font&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#F2A25C;" parent="1" vertex="1">
135+
<mxGeometry x="179.22999999999996" y="590" width="117.18" height="30" as="geometry" />
136+
</mxCell>
137+
<mxCell id="q0k77vlWbUJt_Vhd_Qly-39" value="Converts lists into a dataframe" style="text;html=1;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=1;" parent="1" vertex="1">
138+
<mxGeometry x="179.23000000000002" y="617" width="110" height="41.5" as="geometry" />
139+
</mxCell>
140+
</root>
141+
</mxGraphModel>
142+
</diagram>
143+
</mxfile>

images/replications_statistics.png

396 KB
Loading

rmarkdown/choosing_replications.Rmd

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,104 @@ plot_replication_ci(
191191
include_graphics(path)
192192
```
193193

194+
## Explanation of the automated method
195+
196+
This section walks through how the automation code is structured. The algorithm that determines the number of replications is `ReplicationsAlgorithm`. This depends on other R6 classes including `WelfordStats` and `ReplicationTabuliser`.
197+
198+
### WelfordStats
199+
200+
`WelfordStats` is designed to:
201+
202+
* Keep a **running mean and sum of squares**.
203+
* Return **other statistics** based on these (e.g. standard deviation, confidence intervals).
204+
* **Call the `update()`** method of `ReplicationTabuliser` whenever a new data point is processed by `WelfordStats`
205+
206+
#### How do the running mean and sum of squares calculations work?
207+
208+
The running mean and sum of squares are updated iteratively with each new data point provided, **without requiring the storage of all previous data points**. This approach can be referred to as "online" because we only need to store a small set of values (such as the current mean and sum of squares), rather than maintaining an entire list of past values.
209+
210+
For example, focusing on the mean, normally you would need to store all the data points in a list and sum them up to compute the average - for example:
211+
212+
```
213+
data_points <- c(1, 2, 3, 4, 5)
214+
mean <- sum(data_points) / length(data_points)
215+
```
216+
217+
This works fine for small datasets, but as the data grows, maintaining the entire list becomes impractical. Instead, we can update the mean without storing the previous data points using **Welford's online algorithm**. The formula for the running mean is:
218+
219+
$$
220+
\mu_n = \mu_{n-1} + \frac{x_n - \mu_{n-1}}{n}
221+
$$
222+
223+
Where:
224+
225+
- $\mu_n$ is the running mean after the $n$-th data point.
226+
- $x_n$ is the new data point.
227+
- $\mu_{n-1}$ is the running mean before the new data point.
228+
229+
The key thing to notice here is that, to update the mean, **all we needed to know was the current running mean, the new data point, and the number of data points**. A similar formula exists for calculating the sum of squares.
230+
231+
In our code, every time we call `update()` with a new data point, the mean and sum of squares are adjusted, with `n` keeping track of the number of data points so far - for example:
232+
233+
```
234+
WelfordStats <- R6Class("WelfordStats", list( # nolint: object_name_linter
235+
236+
n = 0L,
237+
mean = NA,
238+
...
239+
240+
update = function(x) {
241+
self$n <- self$n + 1L
242+
...
243+
updated_mean <- self$mean + ((x - self$mean) / self$n)
244+
...
245+
self$mean <- updated_meam
246+
...
247+
```
248+
249+
#### What other statistics can it calculate?
250+
251+
`WelfordStats` then has a series of methods which can return other statistics based on the current mean, sum of squares, and count:
252+
253+
* Variance
254+
* Standard deviation
255+
* Standard error
256+
* Half width of the confidence interval
257+
* Lower confidence interval bound
258+
* Upper confidence interval bound
259+
* Deviation of confidence interval from the mean
260+
261+
### ReplicationTabuliser
262+
263+
`ReplicationTabuliser` keeps track of our results. It:
264+
265+
* Stores **lists with various statistics**, which are updated whenever `update()` is called.
266+
* Can convert these into a **dataframe** using the `summary_table()` method.
267+
268+
![Interaction between WelfordStats and ReplicationTabuliser](../images/replications_statistics.png)
269+
270+
### ReplicationsAlgorithm
271+
272+
The diagram below is a visual representation of the logic in the **ReplicationsAlgorithm**.
273+
274+
Once set up with the relevant parameters, it will first check if there are **initial_replications** to run. These might be specified if the user knows that the model will need at least X amount of replications before any metrics start to get close to the desired precision. The benefit of specifying these is that they are run using **runner()** and so can be run in parallel if chosen.
275+
276+
Once these are run, it checks if any metrics meet precision already. Typically more replications will be required (for the length of the lookahead period) - but if there is no lookahead, they can be marked as solved.
277+
278+
> **What is the lookahead period?**
279+
>
280+
> We want to make sure that the desired precision is stable and maintained for several replications. Here, we refer to this as the lookahead period.
281+
>
282+
> The user will specify **look_ahead** - as noted in [sim-tools](https://tommonks.github.io/sim-tools/04_replications/01_automated_reps.html), this is recommended to be **5** by [Hoad et al. (2010)](https://www.jstor.org/stable/40926090).
283+
>
284+
> The algorithm contains a method **klimit()** which will scale up the lookahead if more than 100 replications have been run, to ensure a sufficient period is being checked for stability, relative to the number of replications. This is simply: `look_ahead/100 * replications`. For example, if we have run 200 replications and look_ahead is 5: `5/100 * 200 = 10`.
285+
286+
After any initial replications, the algorithm enters a while loop. This continues until all metrics are solved or the number of replications surpasses the user-specified **replication_budget** - whichever comes first!
287+
288+
With each loop, it runs the model for another replication, then updates the results for any unsolved metrics from this replication, and checks if precision is met. The **target_met** is a record of how many times in a row precision has been met - once this passes the lookahead period, the metric is marked as solved.
289+
290+
![Visual representation of logic in ReplicationsAlgorithm](../images/replications_algorithm.png)
291+
194292
## Run time
195293

196294
```{r end_timer}

0 commit comments

Comments
 (0)