@@ -118,6 +118,80 @@ def test_genomic_relationship__VanRaden_AGHmatrix_tetraploid(chunks):
118
118
np .testing .assert_array_almost_equal (actual , expect )
119
119
120
120
121
+ def test_genomic_relationship__VanRaden_skipna ():
122
+ # Test that skipna option skips values in call_dosage
123
+ # such that the relationship between each pair of individuals
124
+ # is calculated using only the variants where neither sample
125
+ # has missing data.
126
+ # This should be equivalent to calculating the GRM using
127
+ # multiple subsets of the variants and using pairwise
128
+ # values from the larges subset of variants that doesn't
129
+ # result in a nan value.
130
+ nan = np .nan
131
+ dosage = np .array (
132
+ [
133
+ [0.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 0.0 , 1.0 , 2.0 , 0.0 ],
134
+ [1.0 , 1.0 , 1.0 , 2.0 , nan , 1.0 , 1.0 , 0.0 , 1.0 , 2.0 ],
135
+ [2.0 , 2.0 , 0.0 , 0.0 , nan , 1.0 , 1.0 , 1.0 , 0.0 , 1.0 ],
136
+ [1.0 , 0.0 , 0.0 , 0.0 , nan , 1.0 , 1.0 , 1.0 , 1.0 , 0.0 ],
137
+ [1.0 , 0.0 , 1.0 , 1.0 , nan , 2.0 , 0.0 , 1.0 , 0.0 , 2.0 ],
138
+ [2.0 , 1.0 , 1.0 , 1.0 , nan , 1.0 , 2.0 , nan , 0.0 , 1.0 ],
139
+ [2.0 , 0.0 , 1.0 , 1.0 , nan , 2.0 , 1.0 , nan , 1.0 , 1.0 ],
140
+ [1.0 , 1.0 , 1.0 , 2.0 , nan , 1.0 , 2.0 , nan , 1.0 , 0.0 ],
141
+ [1.0 , 0.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , nan , 1.0 , 1.0 ],
142
+ [2.0 , 1.0 , 1.0 , 1.0 , 1.0 , 2.0 , 1.0 , nan , 2.0 , 1.0 ],
143
+ [1.0 , 2.0 , 2.0 , 1.0 , 2.0 , 0.0 , 1.0 , nan , 1.0 , 2.0 ],
144
+ [0.0 , 0.0 , 1.0 , 2.0 , 0.0 , 1.0 , 0.0 , nan , 1.0 , 2.0 ],
145
+ [1.0 , 2.0 , 1.0 , 2.0 , 2.0 , 0.0 , 1.0 , nan , 1.0 , 0.0 ],
146
+ [0.0 , 2.0 , 1.0 , 1.0 , 0.0 , 1.0 , 0.0 , 1.0 , 1.0 , 0.0 ],
147
+ [1.0 , 1.0 , 2.0 , 1.0 , 0.0 , 0.0 , 1.0 , 0.0 , 0.0 , 2.0 ],
148
+ [2.0 , 0.0 , 2.0 , 2.0 , 1.0 , 1.0 , 1.0 , 1.0 , 0.0 , 2.0 ],
149
+ [1.0 , 0.0 , 1.0 , 1.0 , 1.0 , 2.0 , 2.0 , 1.0 , 2.0 , 1.0 ],
150
+ [2.0 , 1.0 , 2.0 , 1.0 , 1.0 , 1.0 , 2.0 , 1.0 , 1.0 , 1.0 ],
151
+ [1.0 , 1.0 , 2.0 , 1.0 , 1.0 , 2.0 , 0.0 , 2.0 , 1.0 , 2.0 ],
152
+ [1.0 , 0.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 ],
153
+ ]
154
+ )
155
+ ds = xr .Dataset ()
156
+ ds ["call_dosage" ] = ["variants" , "samples" ], dosage
157
+ ds ["ancestral_frequency" ] = "variants" , np .ones (len (dosage )) / 2
158
+ # calculating without skipna will result in nans in the GRM
159
+ expect = sg .genomic_relationship (
160
+ ds ,
161
+ call_dosage = "call_dosage" ,
162
+ ancestral_frequency = "ancestral_frequency" ,
163
+ estimator = "VanRaden" ,
164
+ ploidy = 2 ,
165
+ skipna = False ,
166
+ ).stat_genomic_relationship .values
167
+ assert np .isnan (expect ).sum () > 0
168
+ # fill nan values using maximum subsets without missing data
169
+ idx_0 = ~ np .isnan (dosage [:, 4 ])
170
+ idx_1 = ~ np .isnan (dosage [:, 7 ])
171
+ idx_2 = np .logical_and (idx_0 , idx_1 )
172
+ for idx in [idx_0 , idx_1 , idx_2 ]:
173
+ sub = ds .sel (dict (variants = idx ))
174
+ sub_expect = sg .genomic_relationship (
175
+ sub ,
176
+ call_dosage = "call_dosage" ,
177
+ ancestral_frequency = "ancestral_frequency" ,
178
+ estimator = "VanRaden" ,
179
+ ploidy = 2 ,
180
+ skipna = False ,
181
+ ).stat_genomic_relationship .values
182
+ expect = np .where (np .isnan (expect ), sub_expect , expect )
183
+ # calculate actual value using skipna=True
184
+ actual = sg .genomic_relationship (
185
+ ds ,
186
+ call_dosage = "call_dosage" ,
187
+ ancestral_frequency = "ancestral_frequency" ,
188
+ estimator = "VanRaden" ,
189
+ ploidy = 2 ,
190
+ skipna = True ,
191
+ ).stat_genomic_relationship .values
192
+ np .testing .assert_array_equal (actual , expect )
193
+
194
+
121
195
@pytest .mark .parametrize ("ploidy" , [2 , 4 ])
122
196
def test_genomic_relationship__detect_ploidy (ploidy ):
123
197
ds = xr .Dataset ()
0 commit comments