@@ -101,25 +101,105 @@ def genomic_relationship(
101101 Examples
102102 --------
103103
104+ Diploid dataset without missing data:
105+
104106 >>> import sgkit as sg
105107 >>> ds = sg.simulate_genotype_call_dataset(n_variant=6, n_sample=3, seed=0)
106108 >>> ds = sg.count_call_alleles(ds)
107109 >>> # use reference allele count as dosage
108110 >>> ds["call_dosage"] = ds.call_allele_count[:,:,0]
109111 >>> ds.call_dosage.values # doctest: +NORMALIZE_WHITESPACE
110112 array([[2, 1, 1],
111- [1, 1, 1],
112- [2, 1, 0],
113- [2, 1, 1],
114- [1, 0, 0],
115- [1, 1, 2]], dtype=uint8)
113+ [1, 1, 1],
114+ [2, 1, 0],
115+ [2, 1, 1],
116+ [1, 0, 0],
117+ [1, 1, 2]], dtype=uint8)
116118 >>> # use sample population frequency as ancestral frequency
117119 >>> ds["sample_frequency"] = ds.call_dosage.mean(dim="samples") / ds.dims["ploidy"]
118120 >>> ds = sg.genomic_relationship(ds, ancestral_frequency="sample_frequency")
119121 >>> ds.stat_genomic_relationship.values # doctest: +NORMALIZE_WHITESPACE
120122 array([[ 0.93617021, -0.21276596, -0.72340426],
121- [-0.21276596, 0.17021277, 0.04255319],
122- [-0.72340426, 0.04255319, 0.68085106]])
123+ [-0.21276596, 0.17021277, 0.04255319],
124+ [-0.72340426, 0.04255319, 0.68085106]])
125+
126+ Skipping partial or missing genotype calls:
127+
128+ >>> import sgkit as sg
129+ >>> import xarray as xr
130+ >>> ds = sg.simulate_genotype_call_dataset(
131+ ... n_variant=6,
132+ ... n_sample=4,
133+ ... missing_pct=0.05,
134+ ... seed=0,
135+ ... )
136+ >>> ds = sg.count_call_alleles(ds)
137+ >>> ds["call_dosage"] = xr.where(
138+ ... ds.call_genotype_mask.any(dim="ploidy"),
139+ ... np.nan,
140+ ... ds.call_allele_count[:,:,1], # alternate allele
141+ ... )
142+ >>> ds.call_dosage.values # doctest: +NORMALIZE_WHITESPACE
143+ array([[ 0., 1., 1., 1.],
144+ [ 1., nan, 0., 1.],
145+ [ 2., 0., 1., 1.],
146+ [ 1., 2., nan, 1.],
147+ [ 1., 0., 1., 2.],
148+ [ 2., 2., 0., 0.]])
149+ >>> ds["sample_frequency"] = ds.call_dosage.mean(
150+ ... dim="samples", skipna=True
151+ ... ) / ds.dims["ploidy"]
152+ >>> ds = sg.genomic_relationship(
153+ ... ds, ancestral_frequency="sample_frequency", skipna=True
154+ ... )
155+ >>> ds.stat_genomic_relationship.values # doctest: +NORMALIZE_WHITESPACE
156+ array([[ 0.9744836 , -0.16978417, -0.58417266, -0.33778858],
157+ [-0.16978417, 1.45323741, -0.47619048, -0.89496403],
158+ [-0.58417266, -0.47619048, 0.62446043, 0.34820144],
159+ [-0.33778858, -0.89496403, 0.34820144, 0.79951397]])
160+
161+ Using mean imputation to replace missing genotype calls:
162+
163+ >>> import sgkit as sg
164+ >>> import xarray as xr
165+ >>> ds = sg.simulate_genotype_call_dataset(
166+ ... n_variant=6,
167+ ... n_sample=4,
168+ ... missing_pct=0.05,
169+ ... seed=0,
170+ ... )
171+ >>> ds = sg.count_call_alleles(ds)
172+ >>> ds["call_dosage"] = xr.where(
173+ ... ds.call_genotype_mask.any(dim="ploidy"),
174+ ... np.nan,
175+ ... ds.call_allele_count[:,:,1], # alternate allele
176+ ... )
177+ >>> # use mean imputation to replace missing dosage
178+ >>> ds["call_dosage_imputed"] = xr.where(
179+ ... ds.call_genotype_mask.any(dim="ploidy"),
180+ ... ds.call_dosage.mean(dim="samples", skipna=True),
181+ ... ds.call_dosage,
182+ ... )
183+ >>> ds.call_dosage_imputed.values # doctest: +NORMALIZE_WHITESPACE
184+ array([[0. , 1. , 1. , 1. ],
185+ [1. , 0.66666667, 0. , 1. ],
186+ [2. , 0. , 1. , 1. ],
187+ [1. , 2. , 1.33333333, 1. ],
188+ [1. , 0. , 1. , 2. ],
189+ [2. , 2. , 0. , 0. ]])
190+ >>> ds["sample_frequency"] = ds.call_dosage.mean(
191+ ... dim="samples", skipna=True
192+ ... ) / ds.dims["ploidy"]
193+ >>> ds = sg.genomic_relationship(
194+ ... ds,
195+ ... call_dosage="call_dosage_imputed",
196+ ... ancestral_frequency="sample_frequency",
197+ ... )
198+ >>> ds.stat_genomic_relationship.values # doctest: +NORMALIZE_WHITESPACE
199+ array([[ 0.9744836 , -0.14337789, -0.49331713, -0.33778858],
200+ [-0.14337789, 1.2272175 , -0.32806804, -0.75577157],
201+ [-0.49331713, -0.32806804, 0.527339 , 0.29404617],
202+ [-0.33778858, -0.75577157, 0.29404617, 0.79951397]])
123203
124204 References
125205 ----------
0 commit comments