2
2
Classes for dealing with amino acid mutation sets
3
3
"""
4
4
import re
5
+ from collections import namedtuple
5
6
from operator import attrgetter
6
7
7
8
8
- class VariantCalls :
9
+ class VariantCalls (namedtuple ('VariantCalls' , 'mutation_sets reference' )):
10
+ # TODO: remove all these __init__ methods once PyCharm bug is fixed.
11
+ # https://youtrack.jetbrains.com/issue/PY-26834
12
+ # noinspection PyUnusedLocal
9
13
def __init__ (self , text = None , reference = None , sample = None ):
10
14
""" Construct a set of Mutations given two aligned amino acid sequences
11
15
12
16
:param str reference: the wild-type reference
13
17
:param sample: amino acids present at each position, either a string or
14
18
a list of strings
15
19
"""
16
- self .reference = reference
20
+ # noinspection PyArgumentList
21
+ super ().__init__ ()
22
+
23
+ def __new__ (cls , text = None , reference = None , sample = None ):
17
24
if text is not None :
18
25
terms = text .split ()
19
- self .mutation_sets = frozenset (MutationSet (term ) for term in terms )
26
+ mutation_sets = frozenset (
27
+ MutationSet (term , reference = reference )
28
+ for term in terms )
20
29
else :
21
30
if len (reference ) != len (sample ):
22
31
raise ValueError (
23
32
'Reference length was {} and sample length was {}.' .format (
24
33
len (reference ),
25
34
len (sample )))
26
35
27
- self .mutation_sets = {MutationSet (pos = i , variants = alt , wildtype = ref )
28
- for i , (alt , ref ) in enumerate (zip (sample , reference ),
29
- 1 )
30
- if ref != alt }
36
+ mutation_sets = {MutationSet (pos = i , variants = alt , wildtype = ref )
37
+ for i , (alt , ref ) in enumerate (zip (sample ,
38
+ reference ),
39
+ 1 )}
40
+ # noinspection PyArgumentList
41
+ return super ().__new__ (cls ,
42
+ mutation_sets = mutation_sets ,
43
+ reference = reference )
31
44
32
45
def __str__ (self ):
33
46
return ' ' .join (map (str , sorted (self .mutation_sets ,
@@ -40,23 +53,23 @@ def __repr__(self):
40
53
def __eq__ (self , other ):
41
54
return self .mutation_sets == other .mutation_sets
42
55
56
+ def __ne__ (self , other ):
57
+ return not self .__eq__ (other )
58
+
43
59
def __hash__ (self ):
44
60
return hash (self .mutation_sets )
45
61
62
+ def __iter__ (self ):
63
+ return iter (self .mutation_sets )
46
64
47
- class Mutation ( object ):
48
- """Mutation has optional wildtype, position, and call"""
65
+ def __contains__ ( self , item ):
66
+ return item in self . mutation_sets
49
67
50
- def __init__ (self , text = None , wildtype = None , pos = None , variant = None ):
51
- """ Initialize.
52
68
53
- :param str text: will be parsed for wildtype (optional), position,
54
- and variant
55
- :param str wildtype: amino acid abbreviation for wild type
56
- :param str|int pos: position
57
- :param str variant: single amino acid abbreviation, or 'i' for
58
- insertion, or 'd' for deletion
59
- """
69
+ class Mutation (namedtuple ('Mutation' , 'pos variant wildtype' )):
70
+ """Mutation has optional wildtype, position, and call"""
71
+
72
+ def __new__ (cls , text = None , wildtype = None , pos = None , variant = None ):
60
73
if text is not None :
61
74
match = re .match (r"([A-Z]?)(\d+)([idA-Z])" , text )
62
75
if match is None :
@@ -68,13 +81,25 @@ def __init__(self, text=None, wildtype=None, pos=None, variant=None):
68
81
raise ValueError ('Mutation text only allows one variant.' )
69
82
70
83
wildtype , pos , variant = match .groups ()
71
- self .wildtype = wildtype or None
72
- self .pos = int (pos )
73
- self .variant = variant
84
+ # noinspection PyArgumentList
85
+ return super ().__new__ (cls ,
86
+ pos = int (pos ),
87
+ variant = variant ,
88
+ wildtype = wildtype or None )
74
89
75
- def extract_wildtype (self , seq ):
76
- """I really don't like this; please don't actually use this"""
77
- self .wildtype = seq [self .pos - 1 ]
90
+ # noinspection PyUnusedLocal
91
+ def __init__ (self , text = None , wildtype = None , pos = None , variant = None ):
92
+ """ Initialize.
93
+
94
+ :param str text: will be parsed for wildtype (optional), position,
95
+ and variant
96
+ :param str wildtype: amino acid abbreviation for wild type
97
+ :param str|int pos: position
98
+ :param str variant: single amino acid abbreviation, or 'i' for
99
+ insertion, or 'd' for deletion
100
+ """
101
+ # noinspection PyArgumentList
102
+ super ().__init__ ()
78
103
79
104
def __repr__ (self ):
80
105
text = str (self )
@@ -100,36 +125,31 @@ def __eq__(self, other):
100
125
# now that we agree on the wt and position
101
126
return (self .pos , self .variant ) == (other .pos , other .variant )
102
127
128
+ def __ne__ (self , other ):
129
+ return not self .__eq__ (other )
130
+
103
131
def __hash__ (self ):
104
132
return hash ((self .pos , self .variant ))
105
133
106
134
107
- class MutationSet (object ):
135
+ class MutationSet (namedtuple ( 'MutationSet' , 'pos mutations wildtype' ) ):
108
136
"""Handle sets of mutations at a position"""
109
137
110
- def __init__ (self ,
111
- text = None ,
112
- wildtype = None ,
113
- pos = None ,
114
- variants = None ,
115
- mutations = None ):
116
- """ Initialize
117
-
118
- :param str text: will be parsed for wildtype (optional), position,
119
- and variants
120
- :param str wildtype: amino acid abbreviation for wild type
121
- :param int|str pos: position
122
- :param str variants: zero or more amino acid abbreviations, or 'i' for
123
- insertion, or 'd' for deletion
124
- :param mutations: a sequence of Mutation objects, with matching
125
- positions and wild types
126
- """
138
+ def __new__ (cls ,
139
+ text = None ,
140
+ wildtype = None ,
141
+ pos = None ,
142
+ variants = None ,
143
+ mutations = None ,
144
+ reference = None ):
127
145
if text :
128
146
match = re .match (r"([A-Z]?)(\d+)([idA-Z]*)" , text )
129
147
if match is None :
130
148
raise ValueError
131
149
132
150
wildtype , pos , variants = match .groups ()
151
+ if reference :
152
+ wildtype = reference [int (pos )- 1 ]
133
153
134
154
if variants :
135
155
mutations = frozenset (Mutation (wildtype = wildtype ,
@@ -159,18 +179,41 @@ def __init__(self,
159
179
pos = positions .pop ()
160
180
if wildtypes :
161
181
wildtype = wildtypes .pop ()
162
- self .wildtype = wildtype or None
163
- self .pos = int (pos )
164
- self .mutations = mutations
182
+ # noinspection PyArgumentList
183
+ return super ().__new__ (cls ,
184
+ wildtype = wildtype or None ,
185
+ pos = int (pos ),
186
+ mutations = mutations )
187
+
188
+ # noinspection PyUnusedLocal
189
+ def __init__ (self ,
190
+ text = None ,
191
+ wildtype = None ,
192
+ pos = None ,
193
+ variants = None ,
194
+ mutations = None ,
195
+ reference = None ):
196
+ """ Initialize
197
+
198
+ :param str text: will be parsed for wildtype (optional), position,
199
+ and variants
200
+ :param str wildtype: amino acid abbreviation for wild type
201
+ :param int|str pos: position
202
+ :param str variants: zero or more amino acid abbreviations, or 'i' for
203
+ insertion, or 'd' for deletion
204
+ :param mutations: a sequence of Mutation objects, with matching
205
+ positions and wild types
206
+ :param str reference: alternative source for wildtype, based on
207
+ pos - 1
208
+ """
209
+ # noinspection PyArgumentList
210
+ super ().__init__ ()
165
211
166
212
def __len__ (self ):
167
213
return len (self .mutations )
168
214
169
215
def __contains__ (self , call ):
170
- for mutation in self .mutations :
171
- if call == mutation :
172
- return True
173
- return False
216
+ return call in self .mutations
174
217
175
218
def __eq__ (self , other ):
176
219
if self .pos != other .pos :
@@ -184,18 +227,14 @@ def __eq__(self, other):
184
227
raise ValueError (message )
185
228
return self .mutations == other .mutations
186
229
230
+ def __ne__ (self , other ):
231
+ return not self .__eq__ (other )
232
+
187
233
def __hash__ (self ):
188
234
return hash ((self .pos , self .mutations ))
189
235
190
236
def __iter__ (self ):
191
- self ._mu_iter = list (self .mutations ).__iter__ ()
192
- return self ._mu_iter
193
-
194
- def __next__ (self ):
195
- return self ._mu_iter .__next__ ()
196
-
197
- def __reversed__ (self ):
198
- return list (self .mutations ).__reversed__ ()
237
+ return iter (self .mutations )
199
238
200
239
def __str__ (self ):
201
240
text = self .wildtype or ''
0 commit comments