Skip to content

Commit eba8bdb

Browse files
committed
Make vcf classes immutable so they can safely be used as keys.
1 parent 644cbdb commit eba8bdb

File tree

2 files changed

+144
-62
lines changed

2 files changed

+144
-62
lines changed

pyvdrm/tests/test_vcf.py

Lines changed: 48 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,12 @@ def test_equal_with_wildtype_mismatch(self):
7777
if set1 == set2:
7878
pass
7979

80+
def test_immutable(self):
81+
m = Mutation('Q1A')
82+
83+
with self.assertRaises(AttributeError):
84+
m.pos = 2
85+
8086

8187
class TestMutationSet(unittest.TestCase):
8288
def test_init_text(self):
@@ -251,6 +257,12 @@ def test_no_wildtype(self):
251257
mutations = MutationSet(text)
252258
self.assertIsNone(mutations.wildtype)
253259

260+
def test_immutable(self):
261+
ms = MutationSet('Q80KR')
262+
263+
with self.assertRaises(AttributeError):
264+
ms.wildtype = 'D'
265+
254266

255267
class TestVariantCalls(unittest.TestCase):
256268
def test_init_text(self):
@@ -264,22 +276,21 @@ def test_init_text(self):
264276
def test_init_single_sequence(self):
265277
reference = 'ACHE'
266278
sample = 'ICRE'
267-
expected_mutation_sets = {MutationSet('A1I'), MutationSet('H3R')}
279+
expected_calls = VariantCalls('A1I C2C H3R E4E')
268280

269281
calls = VariantCalls(reference=reference, sample=sample)
270282

271283
self.assertEqual(reference, calls.reference)
272-
self.assertEqual(expected_mutation_sets, calls.mutation_sets)
284+
self.assertEqual(expected_calls, calls)
273285

274286
def test_init_multiple_sequences(self):
275287
reference = 'ACHE'
276288
sample = ['IN', 'C', 'HR', 'E']
277-
expected_mutation_sets = {MutationSet('A1IN'), MutationSet('H3HR')}
289+
expected_calls = VariantCalls('A1IN C2C H3HR E4E')
278290

279291
calls = VariantCalls(reference=reference, sample=sample)
280292

281-
self.assertEqual(reference, calls.reference)
282-
self.assertEqual(expected_mutation_sets, calls.mutation_sets)
293+
self.assertEqual(expected_calls, calls)
283294

284295
def test_init_bad_length(self):
285296
reference = 'ACHE'
@@ -289,6 +300,16 @@ def test_init_bad_length(self):
289300
ValueError, r'Reference length was 4 and sample length was 5\.'):
290301
VariantCalls(reference=reference, sample=sample)
291302

303+
def test_init_with_reference(self):
304+
expected_reference = 'ASH'
305+
expected_repr = "VariantCalls('A1IL H3R')"
306+
307+
calls = VariantCalls('1IL 3R', reference=expected_reference)
308+
r = repr(calls)
309+
310+
self.assertEqual(expected_reference, calls.reference)
311+
self.assertEqual(expected_repr, r)
312+
292313
def test_repr(self):
293314
expected_repr = "VariantCalls('A1IL H3R')"
294315
calls = VariantCalls('A1IL H3R')
@@ -325,6 +346,28 @@ def test_hash(self):
325346
self.assertNotEqual(hash1, hash3)
326347
self.assertNotEqual(hash1, hash4)
327348

349+
def test_iter(self):
350+
calls = VariantCalls('A1IL H3R')
351+
expected_mutation_sets = {MutationSet('A1IL'), MutationSet('H3R')}
352+
353+
mutation_sets = set(calls)
354+
355+
self.assertEqual(expected_mutation_sets, mutation_sets)
356+
357+
def test_in(self):
358+
calls = VariantCalls('A1IL H3R')
359+
mutation_set1 = MutationSet('H3R')
360+
mutation_set2 = MutationSet('H4R')
361+
362+
self.assertIn(mutation_set1, calls)
363+
self.assertNotIn(mutation_set2, calls)
364+
365+
def test_immutable(self):
366+
calls = VariantCalls('A1IL H3R')
367+
368+
with self.assertRaises(AttributeError):
369+
calls.reference = 'ASH'
370+
328371

329372
if __name__ == '__main__':
330373
unittest.main()

pyvdrm/vcf.py

Lines changed: 96 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -2,32 +2,45 @@
22
Classes for dealing with amino acid mutation sets
33
"""
44
import re
5+
from collections import namedtuple
56
from operator import attrgetter
67

78

8-
class VariantCalls:
9+
class VariantCalls(namedtuple('VariantCalls', 'mutation_sets reference')):
10+
# TODO: remove all these __init__ methods once PyCharm bug is fixed.
11+
# https://youtrack.jetbrains.com/issue/PY-26834
12+
# noinspection PyUnusedLocal
913
def __init__(self, text=None, reference=None, sample=None):
1014
""" Construct a set of Mutations given two aligned amino acid sequences
1115
1216
:param str reference: the wild-type reference
1317
:param sample: amino acids present at each position, either a string or
1418
a list of strings
1519
"""
16-
self.reference = reference
20+
# noinspection PyArgumentList
21+
super().__init__()
22+
23+
def __new__(cls, text=None, reference=None, sample=None):
1724
if text is not None:
1825
terms = text.split()
19-
self.mutation_sets = frozenset(MutationSet(term) for term in terms)
26+
mutation_sets = frozenset(
27+
MutationSet(term, reference=reference)
28+
for term in terms)
2029
else:
2130
if len(reference) != len(sample):
2231
raise ValueError(
2332
'Reference length was {} and sample length was {}.'.format(
2433
len(reference),
2534
len(sample)))
2635

27-
self.mutation_sets = {MutationSet(pos=i, variants=alt, wildtype=ref)
28-
for i, (alt, ref) in enumerate(zip(sample, reference),
29-
1)
30-
if ref != alt}
36+
mutation_sets = {MutationSet(pos=i, variants=alt, wildtype=ref)
37+
for i, (alt, ref) in enumerate(zip(sample,
38+
reference),
39+
1)}
40+
# noinspection PyArgumentList
41+
return super().__new__(cls,
42+
mutation_sets=mutation_sets,
43+
reference=reference)
3144

3245
def __str__(self):
3346
return ' '.join(map(str, sorted(self.mutation_sets,
@@ -40,23 +53,23 @@ def __repr__(self):
4053
def __eq__(self, other):
4154
return self.mutation_sets == other.mutation_sets
4255

56+
def __ne__(self, other):
57+
return not self.__eq__(other)
58+
4359
def __hash__(self):
4460
return hash(self.mutation_sets)
4561

62+
def __iter__(self):
63+
return iter(self.mutation_sets)
4664

47-
class Mutation(object):
48-
"""Mutation has optional wildtype, position, and call"""
65+
def __contains__(self, item):
66+
return item in self.mutation_sets
4967

50-
def __init__(self, text=None, wildtype=None, pos=None, variant=None):
51-
""" Initialize.
5268

53-
:param str text: will be parsed for wildtype (optional), position,
54-
and variant
55-
:param str wildtype: amino acid abbreviation for wild type
56-
:param str|int pos: position
57-
:param str variant: single amino acid abbreviation, or 'i' for
58-
insertion, or 'd' for deletion
59-
"""
69+
class Mutation(namedtuple('Mutation', 'pos variant wildtype')):
70+
"""Mutation has optional wildtype, position, and call"""
71+
72+
def __new__(cls, text=None, wildtype=None, pos=None, variant=None):
6073
if text is not None:
6174
match = re.match(r"([A-Z]?)(\d+)([idA-Z])", text)
6275
if match is None:
@@ -68,13 +81,25 @@ def __init__(self, text=None, wildtype=None, pos=None, variant=None):
6881
raise ValueError('Mutation text only allows one variant.')
6982

7083
wildtype, pos, variant = match.groups()
71-
self.wildtype = wildtype or None
72-
self.pos = int(pos)
73-
self.variant = variant
84+
# noinspection PyArgumentList
85+
return super().__new__(cls,
86+
pos=int(pos),
87+
variant=variant,
88+
wildtype=wildtype or None)
7489

75-
def extract_wildtype(self, seq):
76-
"""I really don't like this; please don't actually use this"""
77-
self.wildtype = seq[self.pos - 1]
90+
# noinspection PyUnusedLocal
91+
def __init__(self, text=None, wildtype=None, pos=None, variant=None):
92+
""" Initialize.
93+
94+
:param str text: will be parsed for wildtype (optional), position,
95+
and variant
96+
:param str wildtype: amino acid abbreviation for wild type
97+
:param str|int pos: position
98+
:param str variant: single amino acid abbreviation, or 'i' for
99+
insertion, or 'd' for deletion
100+
"""
101+
# noinspection PyArgumentList
102+
super().__init__()
78103

79104
def __repr__(self):
80105
text = str(self)
@@ -100,36 +125,31 @@ def __eq__(self, other):
100125
# now that we agree on the wt and position
101126
return (self.pos, self.variant) == (other.pos, other.variant)
102127

128+
def __ne__(self, other):
129+
return not self.__eq__(other)
130+
103131
def __hash__(self):
104132
return hash((self.pos, self.variant))
105133

106134

107-
class MutationSet(object):
135+
class MutationSet(namedtuple('MutationSet', 'pos mutations wildtype')):
108136
"""Handle sets of mutations at a position"""
109137

110-
def __init__(self,
111-
text=None,
112-
wildtype=None,
113-
pos=None,
114-
variants=None,
115-
mutations=None):
116-
""" Initialize
117-
118-
:param str text: will be parsed for wildtype (optional), position,
119-
and variants
120-
:param str wildtype: amino acid abbreviation for wild type
121-
:param int|str pos: position
122-
:param str variants: zero or more amino acid abbreviations, or 'i' for
123-
insertion, or 'd' for deletion
124-
:param mutations: a sequence of Mutation objects, with matching
125-
positions and wild types
126-
"""
138+
def __new__(cls,
139+
text=None,
140+
wildtype=None,
141+
pos=None,
142+
variants=None,
143+
mutations=None,
144+
reference=None):
127145
if text:
128146
match = re.match(r"([A-Z]?)(\d+)([idA-Z]*)", text)
129147
if match is None:
130148
raise ValueError
131149

132150
wildtype, pos, variants = match.groups()
151+
if reference:
152+
wildtype = reference[int(pos)-1]
133153

134154
if variants:
135155
mutations = frozenset(Mutation(wildtype=wildtype,
@@ -159,18 +179,41 @@ def __init__(self,
159179
pos = positions.pop()
160180
if wildtypes:
161181
wildtype = wildtypes.pop()
162-
self.wildtype = wildtype or None
163-
self.pos = int(pos)
164-
self.mutations = mutations
182+
# noinspection PyArgumentList
183+
return super().__new__(cls,
184+
wildtype=wildtype or None,
185+
pos=int(pos),
186+
mutations=mutations)
187+
188+
# noinspection PyUnusedLocal
189+
def __init__(self,
190+
text=None,
191+
wildtype=None,
192+
pos=None,
193+
variants=None,
194+
mutations=None,
195+
reference=None):
196+
""" Initialize
197+
198+
:param str text: will be parsed for wildtype (optional), position,
199+
and variants
200+
:param str wildtype: amino acid abbreviation for wild type
201+
:param int|str pos: position
202+
:param str variants: zero or more amino acid abbreviations, or 'i' for
203+
insertion, or 'd' for deletion
204+
:param mutations: a sequence of Mutation objects, with matching
205+
positions and wild types
206+
:param str reference: alternative source for wildtype, based on
207+
pos - 1
208+
"""
209+
# noinspection PyArgumentList
210+
super().__init__()
165211

166212
def __len__(self):
167213
return len(self.mutations)
168214

169215
def __contains__(self, call):
170-
for mutation in self.mutations:
171-
if call == mutation:
172-
return True
173-
return False
216+
return call in self.mutations
174217

175218
def __eq__(self, other):
176219
if self.pos != other.pos:
@@ -184,18 +227,14 @@ def __eq__(self, other):
184227
raise ValueError(message)
185228
return self.mutations == other.mutations
186229

230+
def __ne__(self, other):
231+
return not self.__eq__(other)
232+
187233
def __hash__(self):
188234
return hash((self.pos, self.mutations))
189235

190236
def __iter__(self):
191-
self._mu_iter = list(self.mutations).__iter__()
192-
return self._mu_iter
193-
194-
def __next__(self):
195-
return self._mu_iter.__next__()
196-
197-
def __reversed__(self):
198-
return list(self.mutations).__reversed__()
237+
return iter(self.mutations)
199238

200239
def __str__(self):
201240
text = self.wildtype or ''

0 commit comments

Comments
 (0)