Skip to content

Commit 0101ab7

Browse files
jeromekellehermergify[bot]
authored andcommitted
Change min_length to min_span in IBD APIs
Some minor doc updates also.
1 parent a12ef78 commit 0101ab7

File tree

10 files changed

+101
-91
lines changed

10 files changed

+101
-91
lines changed

c/tests/test_tables.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6292,7 +6292,7 @@ test_ibd_segments_empty_result(void)
62926292
}
62936293

62946294
static void
6295-
test_ibd_segments_min_length_max_time(void)
6295+
test_ibd_segments_min_span_max_time(void)
62966296
{
62976297
int ret;
62986298
tsk_treeseq_t ts;
@@ -9275,8 +9275,7 @@ main(int argc, char **argv)
92759275
test_ibd_segments_single_tree_options },
92769276
{ "test_ibd_segments_multiple_trees", test_ibd_segments_multiple_trees },
92779277
{ "test_ibd_segments_empty_result", test_ibd_segments_empty_result },
9278-
{ "test_ibd_segments_min_length_max_time",
9279-
test_ibd_segments_min_length_max_time },
9278+
{ "test_ibd_segments_min_span_max_time", test_ibd_segments_min_span_max_time },
92809279
{ "test_ibd_segments_single_tree_between",
92819280
test_ibd_segments_single_tree_between },
92829281
{ "test_ibd_segments_samples_are_descendants",

c/tskit/tables.c

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7634,7 +7634,7 @@ tsk_identity_segments_get(const tsk_identity_segments_t *self, tsk_id_t sample_a
76347634

76357635
typedef struct {
76367636
tsk_identity_segments_t *result;
7637-
double min_length;
7637+
double min_span;
76387638
double max_time;
76397639
const tsk_table_collection_t *tables;
76407640
/* Maps nodes to their sample set IDs. Input samples map to set 0
@@ -7755,14 +7755,14 @@ tsk_ibd_finder_add_sample_ancestry(tsk_ibd_finder_t *self)
77557755

77567756
static int TSK_WARN_UNUSED
77577757
tsk_ibd_finder_init(tsk_ibd_finder_t *self, const tsk_table_collection_t *tables,
7758-
tsk_identity_segments_t *result, double min_length, double max_time)
7758+
tsk_identity_segments_t *result, double min_span, double max_time)
77597759
{
77607760
int ret = 0;
77617761
tsk_size_t num_nodes;
77627762

77637763
tsk_memset(self, 0, sizeof(tsk_ibd_finder_t));
77647764

7765-
if (min_length < 0) {
7765+
if (min_span < 0) {
77667766
ret = TSK_ERR_BAD_PARAM_VALUE;
77677767
goto out;
77687768
}
@@ -7774,7 +7774,7 @@ tsk_ibd_finder_init(tsk_ibd_finder_t *self, const tsk_table_collection_t *tables
77747774
self->tables = tables;
77757775
self->result = result;
77767776
self->max_time = max_time;
7777-
self->min_length = min_length;
7777+
self->min_span = min_span;
77787778

77797779
ret = tsk_blkalloc_init(&self->segment_heap, 8192);
77807780
if (ret != 0) {
@@ -7807,7 +7807,7 @@ tsk_ibd_finder_enqueue_segment(
78077807
tsk_segment_t *seg;
78087808
void *p;
78097809

7810-
if ((right - left) > self->min_length) {
7810+
if ((right - left) > self->min_span) {
78117811
/* Make sure we always have room for one more segment in the queue so we
78127812
* can put a tail sentinel on it */
78137813
if (self->segment_queue_size == self->max_segment_queue_size - 1) {
@@ -7837,7 +7837,7 @@ tsk_ibd_finder_passes_filters(
78377837
if (a == b) {
78387838
return false;
78397839
}
7840-
if ((right - left) <= self->min_length) {
7840+
if ((right - left) <= self->min_span) {
78417841
return false;
78427842
}
78437843
if (self->finding_between) {
@@ -7901,7 +7901,7 @@ tsk_ibd_finder_print_state(tsk_ibd_finder_t *self, FILE *out)
79017901

79027902
fprintf(out, "--ibd-finder stats--\n");
79037903
fprintf(out, "max_time = %f\n", self->max_time);
7904-
fprintf(out, "min_length = %f\n", self->min_length);
7904+
fprintf(out, "min_span = %f\n", self->min_span);
79057905
fprintf(out, "finding_between = %d\n", self->finding_between);
79067906
fprintf(out, "===\nEdges\n===\n");
79077907
for (j = 0; j < self->tables->edges.num_rows; j++) {
@@ -10941,7 +10941,7 @@ tsk_table_collection_link_ancestors(tsk_table_collection_t *self, tsk_id_t *samp
1094110941
int TSK_WARN_UNUSED
1094210942
tsk_table_collection_ibd_within(const tsk_table_collection_t *self,
1094310943
tsk_identity_segments_t *result, const tsk_id_t *samples, tsk_size_t num_samples,
10944-
double min_length, double max_time, tsk_flags_t options)
10944+
double min_span, double max_time, tsk_flags_t options)
1094510945
{
1094610946
int ret = 0;
1094710947
tsk_ibd_finder_t ibd_finder;
@@ -10950,7 +10950,7 @@ tsk_table_collection_ibd_within(const tsk_table_collection_t *self,
1095010950
if (ret != 0) {
1095110951
goto out;
1095210952
}
10953-
ret = tsk_ibd_finder_init(&ibd_finder, self, result, min_length, max_time);
10953+
ret = tsk_ibd_finder_init(&ibd_finder, self, result, min_span, max_time);
1095410954
if (ret != 0) {
1095510955
goto out;
1095610956
}
@@ -10973,7 +10973,7 @@ tsk_table_collection_ibd_within(const tsk_table_collection_t *self,
1097310973
int TSK_WARN_UNUSED
1097410974
tsk_table_collection_ibd_between(const tsk_table_collection_t *self,
1097510975
tsk_identity_segments_t *result, tsk_size_t num_sample_sets,
10976-
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, double min_length,
10976+
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, double min_span,
1097710977
double max_time, tsk_flags_t options)
1097810978
{
1097910979
int ret = 0;
@@ -10983,7 +10983,7 @@ tsk_table_collection_ibd_between(const tsk_table_collection_t *self,
1098310983
if (ret != 0) {
1098410984
goto out;
1098510985
}
10986-
ret = tsk_ibd_finder_init(&ibd_finder, self, result, min_length, max_time);
10986+
ret = tsk_ibd_finder_init(&ibd_finder, self, result, min_span, max_time);
1098710987
if (ret != 0) {
1098810988
goto out;
1098910989
}

c/tskit/tables.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4139,11 +4139,11 @@ tsk_id_t tsk_table_collection_check_integrity(
41394139
* This should be done as part of documenting, I guess. */
41404140
int tsk_table_collection_ibd_within(const tsk_table_collection_t *self,
41414141
tsk_identity_segments_t *result, const tsk_id_t *samples, tsk_size_t num_samples,
4142-
double min_length, double max_time, tsk_flags_t options);
4142+
double min_span, double max_time, tsk_flags_t options);
41434143

41444144
int tsk_table_collection_ibd_between(const tsk_table_collection_t *self,
41454145
tsk_identity_segments_t *result, tsk_size_t num_sample_sets,
4146-
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, double min_length,
4146+
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, double min_span,
41474147
double max_time, tsk_flags_t options);
41484148

41494149
int tsk_table_collection_link_ancestors(tsk_table_collection_t *self, tsk_id_t *samples,

docs/ibd.md

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,17 @@ kernelspec:
1919

2020
# Identity by descent
2121

22-
The {meth}`.TreeSequence.ibd_segments` allows us to compute
22+
The {meth}`.TreeSequence.ibd_segments` method allows us to compute
2323
segments of identity by descent.
2424

2525
:::{note}
2626
This documentation page is preliminary
2727
:::
2828

29+
:::{todo}
30+
Relate the concept of identity by descent to the MRCA spans in the tree sequence.
31+
:::
32+
2933
## Examples
3034

3135
Let's take a simple tree sequence to illustrate the {meth}`.TreeSequence.ibd_segments`
@@ -78,8 +82,8 @@ segments returned are the longest possible ones.
7882
Consider the IBD segments that we get from our example tree sequence:
7983

8084
```{code-cell}
81-
segs = ts.ibd_segments(store_segments=True)
82-
for pair, segment_list in segs.items():
85+
segments = ts.ibd_segments(store_segments=True)
86+
for pair, segment_list in segments.items():
8387
print(pair, list(segment_list))
8488
```
8589

@@ -97,8 +101,8 @@ The result of calling {meth}`.TreeSequence.ibd_segments` is an
97101
{class}`.IdentitySegments` class:
98102

99103
```{code-cell}
100-
segs = ts.ibd_segments()
101-
print(segs)
104+
segments = ts.ibd_segments()
105+
print(segments)
102106
```
103107

104108
By default this class only stores the high-level summaries of the
@@ -121,27 +125,27 @@ needing to store the actual lists.
121125

122126

123127
```{code-cell}
124-
segs = ts.ibd_segments(store_pairs=True)
125-
for pair, value in segs.items():
128+
segments = ts.ibd_segments(store_pairs=True)
129+
for pair, value in segments.items():
126130
print(pair, "::", value)
127131
```
128132

129133
Now we can see the more detailed breakdown of how the identity segments
130134
are distributed among the sample pairs. The {class}`.IdentitySegments`
131-
class behaves like a dictionary, such that ``segs[(a, b)]`` will return
135+
class behaves like a dictionary, such that ``segments[(a, b)]`` will return
132136
the {class}`.IdentitySegmentList` instance for that pair of samples:
133137

134138
```{code-cell}
135-
seglist = segs[(0, 1)]
139+
seglist = segments[(0, 1)]
136140
print(seglist)
137141
```
138142

139143
If we want to access the detailed information about the actual
140144
identity segments, we must use the ``store_segments`` argument:
141145

142146
```{code-cell}
143-
segs = ts.ibd_segments(store_pairs=True, store_segments=True)
144-
segs[(0, 1)]
147+
segments = ts.ibd_segments(store_pairs=True, store_segments=True)
148+
segments[(0, 1)]
145149
```
146150

147151
The {class}`.IdentitySegmentList` behaves like a Python list,
@@ -161,29 +165,29 @@ is arbitrary, and may change in future versions.
161165
### Controlling the sample sets
162166

163167
By default we get the IBD segments between all pairs of
164-
:ref:`sample<sec_data_model_definitions_samples>` nodes.
168+
{ref}`sample<sec_data_model_definitions_sample>` nodes.
165169

166170
#### IBD within a sample set
167171
We can reduce this to pairs within a specific set using the
168-
``within`` argument::
172+
``within`` argument:
169173

170174

171175
```{eval-rst}
172176
.. todo:: More detail and better examples here.
173177
```
174178

175179
```{code-cell}
176-
segs = ts.ibd_segments(within=[0, 2], store_pairs=True)
177-
print(list(segs.keys()))
180+
segments = ts.ibd_segments(within=[0, 2], store_pairs=True)
181+
print(list(segments.keys()))
178182
```
179183

180184
#### IBD between sample sets
181185

182186
We can also compute IBD **between** sample sets:
183187

184188
```{code-cell}
185-
segs = ts.ibd_segments(between=[[0,1], [2]], store_pairs=True)
186-
print(list(segs.keys()))
189+
segments = ts.ibd_segments(between=[[0,1], [2]], store_pairs=True)
190+
print(list(segments.keys()))
187191
```
188192

189193
:::{seealso}
@@ -193,7 +197,7 @@ more details.
193197

194198
### Constraints on the segments
195199

196-
The ``max_time`` and ``min_length`` arguments allow us to constrain the
200+
The ``max_time`` and ``min_span`` arguments allow us to constrain the
197201
segments that we consider.
198202

199203
```{eval-rst}

python/_tskitmodule.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6811,20 +6811,20 @@ TableCollection_ibd_segments_within(
68116811
PyArrayObject *samples_array = NULL;
68126812
int32_t *samples = NULL;
68136813
tsk_size_t num_samples = 0;
6814-
double min_length = 0;
6814+
double min_span = 0;
68156815
double max_time = DBL_MAX;
68166816
int store_pairs = 0;
68176817
int store_segments = 0;
68186818
npy_intp *shape;
68196819
static char *kwlist[]
6820-
= { "samples", "min_length", "max_time", "store_pairs", "store_segments", NULL };
6820+
= { "samples", "min_span", "max_time", "store_pairs", "store_segments", NULL };
68216821
tsk_flags_t options = 0;
68226822

68236823
if (TableCollection_check_state(self) != 0) {
68246824
goto out;
68256825
}
68266826
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oddii", kwlist, &py_samples,
6827-
&min_length, &max_time, &store_pairs, &store_segments)) {
6827+
&min_span, &max_time, &store_pairs, &store_segments)) {
68286828
goto out;
68296829
}
68306830
if (py_samples != Py_None) {
@@ -6851,7 +6851,7 @@ TableCollection_ibd_segments_within(
68516851
}
68526852

68536853
err = tsk_table_collection_ibd_within(self->tables, result->identity_segments,
6854-
samples, num_samples, min_length, max_time, options);
6854+
samples, num_samples, min_span, max_time, options);
68556855
if (err != 0) {
68566856
handle_library_error(err);
68576857
goto out;
@@ -6876,19 +6876,19 @@ TableCollection_ibd_segments_between(
68766876
PyArrayObject *sample_set_sizes_array = NULL;
68776877
IdentitySegments *result = NULL;
68786878
tsk_size_t num_sample_sets;
6879-
double min_length = 0;
6879+
double min_span = 0;
68806880
double max_time = DBL_MAX;
68816881
int store_pairs = 0;
68826882
int store_segments = 0;
6883-
static char *kwlist[] = { "sample_set_sizes", "sample_sets", "min_length",
6884-
"max_time", "store_pairs", "store_segments", NULL };
6883+
static char *kwlist[] = { "sample_set_sizes", "sample_sets", "min_span", "max_time",
6884+
"store_pairs", "store_segments", NULL };
68856885
tsk_flags_t options = 0;
68866886

68876887
if (TableCollection_check_state(self) != 0) {
68886888
goto out;
68896889
}
68906890
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|ddii", kwlist, &sample_set_sizes,
6891-
&sample_sets, &min_length, &max_time, &store_pairs, &store_segments)) {
6891+
&sample_sets, &min_span, &max_time, &store_pairs, &store_segments)) {
68926892
goto out;
68936893
}
68946894
if (parse_sample_sets(sample_set_sizes, &sample_set_sizes_array, sample_sets,
@@ -6911,7 +6911,7 @@ TableCollection_ibd_segments_between(
69116911

69126912
err = tsk_table_collection_ibd_between(self->tables, result->identity_segments,
69136913
num_sample_sets, (tsk_size_t *) PyArray_DATA(sample_set_sizes_array),
6914-
(tsk_id_t *) PyArray_DATA(sample_sets_array), min_length, max_time, options);
6914+
(tsk_id_t *) PyArray_DATA(sample_sets_array), min_span, max_time, options);
69156915
if (err != 0) {
69166916
handle_library_error(err);
69176917
goto out;

python/tests/ibd.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ class IbdFinder:
144144
Finds all IBD relationships between specified sample pairs in a tree sequence.
145145
"""
146146

147-
def __init__(self, ts, *, within=None, between=None, min_length=0, max_time=None):
147+
def __init__(self, ts, *, within=None, between=None, min_span=0, max_time=None):
148148
self.ts = ts
149149
self.result = IbdResult()
150150
if within is not None and between is not None:
@@ -160,7 +160,7 @@ def __init__(self, ts, *, within=None, between=None, min_length=0, max_time=None
160160
if within is None:
161161
within = ts.samples()
162162
self.sample_set_id[within] = 0
163-
self.min_length = min_length
163+
self.min_span = min_span
164164
self.max_time = np.inf if max_time is None else max_time
165165
self.A = [SegmentList() for _ in range(ts.num_nodes)] # Descendant segments
166166
for u in range(ts.num_nodes):
@@ -170,7 +170,7 @@ def __init__(self, ts, *, within=None, between=None, min_length=0, max_time=None
170170

171171
def print_state(self):
172172
print("IBD Finder")
173-
print("min_length = ", self.min_length)
173+
print("min_span = ", self.min_span)
174174
print("max_time = ", self.max_time)
175175
print("finding_between = ", self.finding_between)
176176
print("u\tset_id\tA = ")
@@ -192,7 +192,7 @@ def run(self):
192192
max(e.left, s.left),
193193
min(e.right, s.right),
194194
)
195-
if intvl[1] - intvl[0] > self.min_length:
195+
if intvl[1] - intvl[0] > self.min_span:
196196
child_segs.append(Segment(intvl[0], intvl[1], s.node))
197197
s = s.next
198198
self.record_ibd(e.parent, child_segs)
@@ -227,7 +227,7 @@ def record_ibd(self, current_parent, child_segs):
227227
def passes_filters(self, a, b, left, right):
228228
if a == b:
229229
return False
230-
if right - left <= self.min_length:
230+
if right - left <= self.min_span:
231231
return False
232232
if self.finding_between:
233233
return self.sample_set_id[a] != self.sample_set_id[b]
@@ -258,7 +258,7 @@ def passes_filters(self, a, b, left, right):
258258
parser.add_argument(
259259
"--min-length",
260260
type=float,
261-
dest="min_length",
261+
dest="min_span",
262262
nargs=1,
263263
metavar="MIN_LENGTH",
264264
help="Only segments longer than this cutoff will be returned.",
@@ -284,16 +284,16 @@ def passes_filters(self, a, b, left, right):
284284

285285
args = parser.parse_args()
286286
ts = tskit.load(args.infile[0])
287-
if args.min_length is None:
288-
min_length = 0
287+
if args.min_span is None:
288+
min_span = 0
289289
else:
290-
min_length = args.min_length[0]
290+
min_span = args.min_span[0]
291291
if args.max_time is None:
292292
max_time = None
293293
else:
294294
max_time = args.max_time[0]
295295

296-
s = IbdFinder(ts, samples=ts.samples(), min_length=min_length, max_time=max_time)
296+
s = IbdFinder(ts, samples=ts.samples(), min_span=min_span, max_time=max_time)
297297
all_segs = s.find_ibd_segments()
298298

299299
if args.samples is None:

0 commit comments

Comments
 (0)