@@ -101,25 +101,105 @@ def genomic_relationship(
101
101
Examples
102
102
--------
103
103
104
+ Diploid dataset without missing data:
105
+
104
106
>>> import sgkit as sg
105
107
>>> ds = sg.simulate_genotype_call_dataset(n_variant=6, n_sample=3, seed=0)
106
108
>>> ds = sg.count_call_alleles(ds)
107
109
>>> # use reference allele count as dosage
108
110
>>> ds["call_dosage"] = ds.call_allele_count[:,:,0]
109
111
>>> ds.call_dosage.values # doctest: +NORMALIZE_WHITESPACE
110
112
array([[2, 1, 1],
111
- [1, 1, 1],
112
- [2, 1, 0],
113
- [2, 1, 1],
114
- [1, 0, 0],
115
- [1, 1, 2]], dtype=uint8)
113
+ [1, 1, 1],
114
+ [2, 1, 0],
115
+ [2, 1, 1],
116
+ [1, 0, 0],
117
+ [1, 1, 2]], dtype=uint8)
116
118
>>> # use sample population frequency as ancestral frequency
117
119
>>> ds["sample_frequency"] = ds.call_dosage.mean(dim="samples") / ds.dims["ploidy"]
118
120
>>> ds = sg.genomic_relationship(ds, ancestral_frequency="sample_frequency")
119
121
>>> ds.stat_genomic_relationship.values # doctest: +NORMALIZE_WHITESPACE
120
122
array([[ 0.93617021, -0.21276596, -0.72340426],
121
- [-0.21276596, 0.17021277, 0.04255319],
122
- [-0.72340426, 0.04255319, 0.68085106]])
123
+ [-0.21276596, 0.17021277, 0.04255319],
124
+ [-0.72340426, 0.04255319, 0.68085106]])
125
+
126
+ Skipping partial or missing genotype calls:
127
+
128
+ >>> import sgkit as sg
129
+ >>> import xarray as xr
130
+ >>> ds = sg.simulate_genotype_call_dataset(
131
+ ... n_variant=6,
132
+ ... n_sample=4,
133
+ ... missing_pct=0.05,
134
+ ... seed=0,
135
+ ... )
136
+ >>> ds = sg.count_call_alleles(ds)
137
+ >>> ds["call_dosage"] = xr.where(
138
+ ... ds.call_genotype_mask.any(dim="ploidy"),
139
+ ... np.nan,
140
+ ... ds.call_allele_count[:,:,1], # alternate allele
141
+ ... )
142
+ >>> ds.call_dosage.values # doctest: +NORMALIZE_WHITESPACE
143
+ array([[ 0., 1., 1., 1.],
144
+ [ 1., nan, 0., 1.],
145
+ [ 2., 0., 1., 1.],
146
+ [ 1., 2., nan, 1.],
147
+ [ 1., 0., 1., 2.],
148
+ [ 2., 2., 0., 0.]])
149
+ >>> ds["sample_frequency"] = ds.call_dosage.mean(
150
+ ... dim="samples", skipna=True
151
+ ... ) / ds.dims["ploidy"]
152
+ >>> ds = sg.genomic_relationship(
153
+ ... ds, ancestral_frequency="sample_frequency", skipna=True
154
+ ... )
155
+ >>> ds.stat_genomic_relationship.values # doctest: +NORMALIZE_WHITESPACE
156
+ array([[ 0.9744836 , -0.16978417, -0.58417266, -0.33778858],
157
+ [-0.16978417, 1.45323741, -0.47619048, -0.89496403],
158
+ [-0.58417266, -0.47619048, 0.62446043, 0.34820144],
159
+ [-0.33778858, -0.89496403, 0.34820144, 0.79951397]])
160
+
161
+ Using mean imputation to replace missing genotype calls:
162
+
163
+ >>> import sgkit as sg
164
+ >>> import xarray as xr
165
+ >>> ds = sg.simulate_genotype_call_dataset(
166
+ ... n_variant=6,
167
+ ... n_sample=4,
168
+ ... missing_pct=0.05,
169
+ ... seed=0,
170
+ ... )
171
+ >>> ds = sg.count_call_alleles(ds)
172
+ >>> ds["call_dosage"] = xr.where(
173
+ ... ds.call_genotype_mask.any(dim="ploidy"),
174
+ ... np.nan,
175
+ ... ds.call_allele_count[:,:,1], # alternate allele
176
+ ... )
177
+ >>> # use mean imputation to replace missing dosage
178
+ >>> ds["call_dosage_imputed"] = xr.where(
179
+ ... ds.call_genotype_mask.any(dim="ploidy"),
180
+ ... ds.call_dosage.mean(dim="samples", skipna=True),
181
+ ... ds.call_dosage,
182
+ ... )
183
+ >>> ds.call_dosage_imputed.values # doctest: +NORMALIZE_WHITESPACE
184
+ array([[0. , 1. , 1. , 1. ],
185
+ [1. , 0.66666667, 0. , 1. ],
186
+ [2. , 0. , 1. , 1. ],
187
+ [1. , 2. , 1.33333333, 1. ],
188
+ [1. , 0. , 1. , 2. ],
189
+ [2. , 2. , 0. , 0. ]])
190
+ >>> ds["sample_frequency"] = ds.call_dosage.mean(
191
+ ... dim="samples", skipna=True
192
+ ... ) / ds.dims["ploidy"]
193
+ >>> ds = sg.genomic_relationship(
194
+ ... ds,
195
+ ... call_dosage="call_dosage_imputed",
196
+ ... ancestral_frequency="sample_frequency",
197
+ ... )
198
+ >>> ds.stat_genomic_relationship.values # doctest: +NORMALIZE_WHITESPACE
199
+ array([[ 0.9744836 , -0.14337789, -0.49331713, -0.33778858],
200
+ [-0.14337789, 1.2272175 , -0.32806804, -0.75577157],
201
+ [-0.49331713, -0.32806804, 0.527339 , 0.29404617],
202
+ [-0.33778858, -0.75577157, 0.29404617, 0.79951397]])
123
203
124
204
References
125
205
----------
0 commit comments