Skip to content

Commit d2ce277

Browse files
committed
Merge branch 'hcv_rules'
2 parents d2eff59 + 50021bc commit d2ce277

File tree

2 files changed

+544
-0
lines changed

2 files changed

+544
-0
lines changed

pyvdrm/hcvr.py

+345
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,345 @@
1+
"""
2+
HCV Drug Resistance Rule Parser definition
3+
"""
4+
5+
from functools import reduce, total_ordering
6+
from pyparsing import (Literal, nums, Word, Forward, Optional, Regex,
7+
infixNotation, delimitedList, opAssoc, alphas)
8+
from pyvdrm.drm import AsiExpr, AsiBinaryExpr, AsiUnaryExpr, DRMParser
9+
from pyvdrm.vcf import MutationSet
10+
11+
def update_flags(fst, snd):
12+
for k in snd:
13+
if k in fst:
14+
fst[k].append(snd[k])
15+
else:
16+
fst[k] = snd[k] # this chould be achieved with a defaultdict
17+
return fst
18+
19+
20+
def maybe_foldl(func, noneable):
21+
"""Safely fold a function over a potentially empty list of
22+
potentially null values"""
23+
if noneable is None:
24+
return None
25+
clean = [x for x in noneable if x is not None]
26+
if not clean:
27+
return None
28+
return reduce(func, clean)
29+
30+
31+
def maybe_map(func, noneable):
32+
if noneable is None:
33+
return None
34+
r_list = []
35+
for x in noneable:
36+
if x is None:
37+
continue
38+
result = func(x)
39+
if result is None:
40+
continue
41+
r_list.append(result)
42+
if not r_list:
43+
return None
44+
return r_list
45+
46+
47+
@total_ordering
48+
class Score(object):
49+
"""Encapsulate a score and the residues that support it"""
50+
51+
residues = set([])
52+
score = None
53+
flags = {} # allow a score expression to raise a user defined string
54+
55+
def __init__(self, score, residues, flags={}):
56+
""" Initialize.
57+
58+
:param bool|float score: value of the score
59+
:param residues: sequence of Mutations
60+
:param flags: dictionary of user defined strings and supporting Mutations
61+
"""
62+
self.score = score
63+
self.residues = set(residues)
64+
self.flags = flags
65+
66+
def __add__(self, other):
67+
flags = update_flags(self.flags, other.flags)
68+
return Score(self.score + other.score, self.residues | other.residues,
69+
flags)
70+
71+
def __sub__(self, other):
72+
flags = update_flags(self.flags, other.flags)
73+
return Score(self.score - other.score, self.residues | other.residues,
74+
flags)
75+
76+
def __repr__(self):
77+
return "Score({!r}, {!r})".format(self.score, self.residues)
78+
79+
def __eq__(self, other):
80+
return self.score == other.score
81+
82+
def __lt__(self, other):
83+
# the total_ordering decorator populates the other 5 comparison
84+
# operations. Implement them explicitly if this causes performance
85+
# issues
86+
return self.score < other.score
87+
88+
def __bool__(self):
89+
return self.score
90+
91+
92+
class Negate(AsiExpr):
93+
"""Unary negation of boolean child"""
94+
def __call__(self, mutations):
95+
child_score = self.children[0](mutations)
96+
if child_score is None:
97+
return Score(True, []) # TODO: propagate negative residues
98+
return Score(not child_score.score, child_score.residues)
99+
100+
101+
class BoolTrue(AsiExpr):
102+
"""Boolean True constant"""
103+
def __call__(self, *args):
104+
return Score(True, [])
105+
106+
107+
class BoolFalse(AsiExpr):
108+
"""Boolean False constant"""
109+
def __call__(self, *args):
110+
return Score(False, [])
111+
112+
113+
class AndExpr(AsiExpr):
114+
"""Fold boolean AND on children"""
115+
116+
def __call__(self, mutations):
117+
scores = map(lambda f: f(mutations), self.children[0])
118+
scores = [Score(False, []) if s is None else s for s in scores]
119+
if not scores:
120+
raise ValueError
121+
122+
residues = set([])
123+
for s in scores:
124+
if not s.score:
125+
return Score(False, [])
126+
residues = residues | s.residues
127+
128+
return Score(True, residues)
129+
130+
131+
class OrExpr(AsiBinaryExpr):
132+
"""Boolean OR on children (binary only)"""
133+
134+
def __call__(self, mutations):
135+
arg1, arg2 = self.children
136+
137+
score1 = arg1(mutations)
138+
score2 = arg2(mutations)
139+
140+
if score1 is None:
141+
score1 = Score(False, [])
142+
if score2 is None:
143+
score2 = Score(False, [])
144+
145+
return Score(score1.score or score2.score,
146+
score1.residues | score2.residues)
147+
148+
149+
class EqualityExpr(AsiExpr):
150+
"""ASI2 style inequality expressions"""
151+
152+
def __init__(self, label, pos, children):
153+
super().__init__(label, pos, children)
154+
self.operation, limit = children
155+
self.limit = int(limit)
156+
157+
def __call__(self, x):
158+
if self.operation == 'ATLEAST':
159+
return x >= self.limit
160+
elif self.operation == 'EXACTLY':
161+
return x == self.limit
162+
elif self.operation == 'NOMORETHAN':
163+
return x <= self.limit
164+
165+
raise NotImplementedError
166+
167+
168+
class ScoreExpr(AsiExpr):
169+
"""Score expressions propagate DRM scores"""
170+
171+
def __call__(self, mutations):
172+
173+
flags = {}
174+
if len(self.children) == 4:
175+
operation, _, flag, _ = self.children
176+
flags[flag] = []
177+
score = 0 # should be None
178+
179+
elif len(self.children) == 3:
180+
operation, minus, score = self.children
181+
if minus != '-': # this is parsing the expression twice, refactor
182+
raise ValueError
183+
score = -1 * int(score)
184+
185+
elif len(self.children) == 2:
186+
operation, score = self.children
187+
score = int(score)
188+
189+
else:
190+
raise ValueError
191+
192+
# evaluate operation and return score
193+
result = operation(mutations)
194+
if result is None:
195+
return None
196+
197+
if result.score is False:
198+
return Score(0, [])
199+
return Score(score, result.residues, flags=flags)
200+
201+
202+
class ScoreList(AsiExpr):
203+
"""Lists of scores are either summed or maxed"""
204+
205+
def __call__(self, mutations):
206+
operation, *rest = self.children
207+
if operation == 'MAX':
208+
return maybe_foldl(max, [f(mutations) for f in rest])
209+
210+
# the default operation is sum
211+
return maybe_foldl(lambda x, y: x+y, [f(mutations) for f in self.children])
212+
213+
214+
class SelectFrom(AsiExpr):
215+
"""Return True if some number of mutations match"""
216+
217+
def typecheck(self, tokens):
218+
# if type(tokens[0]) != EqualityExpr:
219+
# raise TypeError()
220+
pass
221+
222+
def __call__(self, mutations):
223+
operation, *rest = self.children
224+
# the head of the arg list must be an equality expression
225+
226+
scored = list(maybe_map(lambda f: f(mutations), rest))
227+
passing = len(scored)
228+
229+
if operation(passing):
230+
return Score(True, maybe_foldl(
231+
lambda x, y: x.residues.union(y.residues), scored))
232+
else:
233+
return None
234+
235+
236+
class AsiScoreCond(AsiExpr):
237+
"""Score condition"""
238+
239+
label = "ScoreCond"
240+
241+
def __call__(self, args):
242+
"""Score conditions evaluate a list of expressions and sum scores"""
243+
return maybe_foldl(lambda x, y: x+y, map(lambda x: x(args), self.children))
244+
245+
246+
class AsiMutations(object):
247+
"""List of mutations given an ambiguous pattern"""
248+
249+
def __init__(self, _label=None, _pos=None, args=None):
250+
"""Initialize set of mutations from a potentially ambiguous residue
251+
"""
252+
self.mutations = args and MutationSet(''.join(args))
253+
254+
def __repr__(self):
255+
if self.mutations is None:
256+
return "AsiMutations()"
257+
return "AsiMutations(args={!r})".format(str(self.mutations))
258+
259+
def __call__(self, env):
260+
for mutation_set in env:
261+
intersection = self.mutations.mutations & mutation_set.mutations
262+
if len(intersection) > 0:
263+
return Score(True, intersection)
264+
return None
265+
266+
267+
class HCVR(DRMParser):
268+
"""HCV Resistance Syntax definition"""
269+
270+
def parser(self, rule):
271+
272+
select = Literal('SELECT').suppress()
273+
except_ = Literal('EXCEPT')
274+
exactly = Literal('EXACTLY')
275+
atleast = Literal('ATLEAST')
276+
277+
from_ = Literal('FROM').suppress()
278+
279+
max_ = Literal('MAX')
280+
281+
and_ = Literal('AND').suppress()
282+
or_ = Literal('OR').suppress()
283+
# min_ = Literal('MIN')
284+
285+
notmorethan = Literal('NOTMORETHAN')
286+
l_par = Literal('(').suppress()
287+
r_par = Literal(')').suppress()
288+
289+
quote = Literal('"')
290+
291+
mapper = Literal('=>').suppress()
292+
integer = Word(nums)
293+
294+
mutation = Optional(Regex(r'[A-Z]')) + integer + Regex(r'[diA-Z]+')
295+
mutation.setParseAction(AsiMutations)
296+
297+
not_ = Literal('NOT').suppress() + mutation
298+
not_.setParseAction(Negate)
299+
300+
residue = mutation | not_
301+
# integer + l_par + not_ + Regex(r'[A-Z]+') + r_par
302+
# roll this next rule into the mutation object
303+
304+
# Syntax of expressions
305+
excludestatement = except_ + residue
306+
307+
quantifier = exactly | atleast | notmorethan
308+
inequality = quantifier + integer
309+
inequality.setParseAction(EqualityExpr)
310+
311+
select_quantifier = infixNotation(inequality,
312+
[(and_, 2, opAssoc.LEFT, AndExpr),
313+
(or_, 2, opAssoc.LEFT, OrExpr)])
314+
315+
residue_list = l_par + delimitedList(residue) + r_par
316+
317+
# so selectstatement.eval :: [Mutation] -> Maybe Bool
318+
selectstatement = select + select_quantifier + from_ + residue_list
319+
selectstatement.setParseAction(SelectFrom)
320+
321+
bool_ = Literal('TRUE').suppress().setParseAction(BoolTrue) |\
322+
Literal('FALSE').suppress().setParseAction(BoolFalse)
323+
324+
booleancondition = Forward()
325+
condition = residue | excludestatement | selectstatement | bool_
326+
327+
booleancondition << infixNotation(condition,
328+
[(and_, 2, opAssoc.LEFT, AndExpr),
329+
(or_, 2, opAssoc.LEFT, OrExpr)]) | condition
330+
331+
score = Optional(Literal('-')) + integer | quote + Word(alphas) + quote
332+
scoreitem = booleancondition + mapper + score
333+
scoreitem.setParseAction(ScoreExpr)
334+
scorelist = max_ + l_par + delimitedList(scoreitem) + r_par |\
335+
delimitedList(scoreitem)
336+
scorelist.setParseAction(ScoreList)
337+
338+
scorecondition = Literal('SCORE FROM').suppress() +\
339+
l_par + delimitedList(scorelist) + r_par
340+
341+
scorecondition.setParseAction(AsiScoreCond)
342+
343+
statement = booleancondition | scorecondition
344+
345+
return statement.parseString(rule)

0 commit comments

Comments
 (0)