Skip to content

Commit 0de0159

Browse files
authored
Merge PR #2787 from sevyharris to parse sources from autogenerated kinetics trees
Update comment parser for autogen trees. The extract_source_from_comments() function was never updated for the autogenerated kinetics trees. This update is required to get the uncertainty tool working again for regular gas-phase RMG mechanisms. Description of Changes: * updated the Disproportionation family in the test database (copied from RMG-database 17cd80b2583ee94158c7585ccf392ceedc9dc020) so we'd have a real autogenerated tree to play with. * updated the H_Abstraction family in the test database (also 17cd80b2) so there would be a hand-made tree with a complicated enough structure to generate all cases of kinetic family source comments. * updated the kineticsTest to support the new families in the test database * added code to parse the kinetics comments from the new autogenerated trees, and I also updated some of the existing parsing code to rely more on regex instead of tokens/splitting lines because it's more robust if the order gets switched around or more information gets added to the comments. I also updated the reconstruct_kinetics_from_source() function in RMG-Py/rmgpy/data/kinetics/database.py to handle ArrheniusBM rules. * added more unit tests for the kinetics comment parsing code to exercise the autogenerated tree comments * updated the uncertainty tool test to use the new values in the parsing_data chemkin file. * updated the isotopesTest to use appropriate templates for the new H_Abstraction family in the test database.
2 parents f921a52 + de45d17 commit 0de0159

File tree

15 files changed

+68487
-5319
lines changed

15 files changed

+68487
-5319
lines changed

rmgpy/data/kinetics/database.py

Lines changed: 41 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -752,42 +752,52 @@ def reconstruct_kinetics_from_source(self, reaction, source, fix_barrier_height=
752752
else:
753753
kinetics = training_entry.data
754754
elif 'Rate Rules' in source:
755-
756755
source_dict = source['Rate Rules'][1]
757756
rules = source_dict['rules']
758757
training = source_dict['training']
759758
degeneracy = source_dict['degeneracy']
760759

761-
log_a = 0
762-
n = 0
763-
alpha = 0
764-
E0 = 0
765-
for rule_entry, weight in rules:
766-
log_a += np.log10(rule_entry.data.A.value_si) * weight
767-
n += rule_entry.data.n.value_si * weight
768-
alpha += rule_entry.data.alpha.value_si * weight
769-
E0 += rule_entry.data.E0.value_si * weight
770-
for rule_entry, training_entry, weight in training:
771-
log_a += np.log10(rule_entry.data.A.value_si) * weight
772-
n += rule_entry.data.n.value_si * weight
773-
alpha += rule_entry.data.alpha.value_si * weight
774-
E0 += rule_entry.data.E0.value_si * weight
775-
776-
a_units = rule_entry.data.A.units
777-
if a_units == 'cm^3/(mol*s)' or a_units == 'cm^3/(molecule*s)' or a_units == 'm^3/(molecule*s)':
778-
a_units = 'm^3/(mol*s)'
779-
elif a_units == 'cm^6/(mol^2*s)' or a_units == 'cm^6/(molecule^2*s)' or a_units == 'm^6/(molecule^2*s)':
780-
a_units = 'm^6/(mol^2*s)'
781-
elif a_units == 's^-1' or a_units == 'm^3/(mol*s)' or a_units == 'm^6/(mol^2*s)':
782-
pass
783-
else:
784-
raise ValueError('Invalid units {0} for averaging kinetics.'.format(a_units))
785-
kinetics = ArrheniusEP(
786-
A=(degeneracy * 10 ** log_a, a_units),
787-
n=n,
788-
alpha=alpha,
789-
E0=(E0 * 0.001, "kJ/mol"),
790-
)
760+
if rules and isinstance(rules[0][0].data, ArrheniusBM):
761+
# This is a rate rule with ArrheniusBM kinetics
762+
assert len(rules) == 1, "There should only be one rate rule for ArrheniusBM kinetics in the autogenerated trees"
763+
kinetics = ArrheniusBM( # have to create a new object to avoid modifying the original when we multiply by degeneracy
764+
A=rules[0][0].data.A,
765+
n=rules[0][0].data.n,
766+
w0=rules[0][0].data.w0,
767+
E0=rules[0][0].data.E0,
768+
)
769+
kinetics.A.value_si *= degeneracy
770+
else: # ArrheniusEP kinetics
771+
log_a = 0
772+
n = 0
773+
alpha = 0
774+
E0 = 0
775+
for rule_entry, weight in rules:
776+
log_a += np.log10(rule_entry.data.A.value_si) * weight
777+
n += rule_entry.data.n.value_si * weight
778+
alpha += rule_entry.data.alpha.value_si * weight
779+
E0 += rule_entry.data.E0.value_si * weight
780+
for rule_entry, training_entry, weight in training:
781+
log_a += np.log10(rule_entry.data.A.value_si) * weight
782+
n += rule_entry.data.n.value_si * weight
783+
alpha += rule_entry.data.alpha.value_si * weight
784+
E0 += rule_entry.data.E0.value_si * weight
785+
a_units = rule_entry.data.A.units
786+
if a_units == 'cm^3/(mol*s)' or a_units == 'cm^3/(molecule*s)' or a_units == 'm^3/(molecule*s)':
787+
a_units = 'm^3/(mol*s)'
788+
elif a_units == 'cm^6/(mol^2*s)' or a_units == 'cm^6/(molecule^2*s)' or a_units == 'm^6/(molecule^2*s)':
789+
a_units = 'm^6/(mol^2*s)'
790+
elif a_units == 's^-1' or a_units == 'm^3/(mol*s)' or a_units == 'm^6/(mol^2*s)':
791+
pass
792+
else:
793+
raise ValueError('Invalid units {0} for averaging kinetics.'.format(a_units))
794+
795+
kinetics = ArrheniusEP(
796+
A=(degeneracy * 10 ** log_a, a_units),
797+
n=n,
798+
alpha=alpha,
799+
E0=(E0 * 0.001, "kJ/mol"),
800+
)
791801
else:
792802
raise ValueError("Source data must be either 'Library', 'PDep','Training', or 'Rate Rules'.")
793803

rmgpy/data/kinetics/family.py

Lines changed: 34 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4442,24 +4442,29 @@ def extract_source_from_comments(self, reaction):
44424442
"""
44434443
lines = reaction.kinetics.comment.split('\n')
44444444

4445-
exact = False
4445+
exact_rule = False
44464446
template = None
44474447
rules = None
44484448
training_entries = None
44494449
degeneracy = 1
44504450

4451-
regex = r"\[(.*)\]" # only hit outermost brackets
4451+
training_reaction_pattern = r'Matched reaction\s*(\d+).*in.*training'
4452+
degeneracy_pattern = r'Multiplied by reaction path degeneracy\s*(\d+)'
4453+
44524454
for line in lines:
4453-
if line.startswith('Matched'):
4455+
training_matches = re.search(training_reaction_pattern, line)
4456+
degeneracy_matches = re.search(degeneracy_pattern, line)
4457+
4458+
if training_matches is not None:
44544459
# Source of the kinetics is from training reaction
4455-
training_reaction_index = int(line.split()[2])
4460+
training_reaction_index = int(training_matches.group(1))
44564461
depository = self.get_training_depository()
44574462
training_entry = depository.entries[training_reaction_index]
44584463
# Perform sanity check that the training reaction's label matches that of the comments
44594464
if training_entry.label not in line:
4460-
raise AssertionError('Reaction {0} uses kinetics from training reaction {1} '
4461-
'but does not match the training reaction {1} from the '
4462-
'{2} family.'.format(reaction, training_reaction_index, self.label))
4465+
raise AssertionError(f'Reaction {reaction} uses kinetics from training reaction {training_reaction_index} '
4466+
f'but does not match the training reaction {training_reaction_index} from the '
4467+
f'{self.label} family.')
44634468

44644469
# Sometimes the matched kinetics could be in the reverse direction.....
44654470
if reaction.is_isomorphic(training_entry.item, either_direction=False, save_order=self.save_order):
@@ -4468,34 +4473,34 @@ def extract_source_from_comments(self, reaction):
44684473
reverse = True
44694474
return True, [self.label, training_entry, reverse]
44704475

4471-
elif line.startswith('Exact match'):
4472-
exact = True
4473-
elif line.startswith('Estimated'):
4474-
pass
4475-
elif line.startswith('Multiplied by'):
4476-
degeneracy = float(line.split()[-1])
4476+
if 'Exact match found for rate rule' in line:
4477+
exact_rule = True
4478+
if degeneracy_matches is not None:
4479+
degeneracy = float(degeneracy_matches.group(1))
44774480

44784481
# Extract the rate rule information
44794482
full_comment_string = reaction.kinetics.comment.replace('\n', ' ')
4480-
4483+
autogen_node_search_pattern = r'Estimated from node (.*)'
44814484
# The rate rule string is right after the phrase 'for rate rule'
4482-
rate_rule_string = full_comment_string.split("for rate rule", 1)[1].strip()
4483-
4484-
if rate_rule_string[0] == '[':
4485-
# Get the contents of the capture group in the regex
4486-
# Remove any spaces which may be left over as a result of a line break
4487-
template_label = re.split(regex, rate_rule_string)[1].replace(' ', '')
4485+
template_pattern = r"for rate rule \[(.*)\]" # only hit outermost brackets
4486+
autogen_node_matches = re.search(autogen_node_search_pattern, full_comment_string)
4487+
template_matches = re.search(template_pattern, full_comment_string)
4488+
if autogen_node_matches is not None: # autogenerated trees
4489+
template_str = autogen_node_matches.group(1).split('Multiplied by reaction path degeneracy')[0].strip()
4490+
tokens = template_str.split()
4491+
if len(tokens) == 2: # The node was probably split because wordwrap was turned off
4492+
assert len(template_str) > 115, 'The node name is too short to have been broken up by the chemkin writer'
4493+
template_str = ''.join(tokens)
4494+
elif len(tokens) > 2: # warn the user the node is probably wrong
4495+
raise ValueError(f'The node name {template_str} has multiple spaces and cannot be parsed for reaction {reaction}.')
4496+
template = self.retrieve_template([template_str])
4497+
elif template_matches is not None: # hand-built trees
4498+
template_label = template_matches.group(1)
4499+
template = self.retrieve_template(template_label.split(';'))
44884500
else:
4489-
# If this has the line 'From training reaction # for rate rule node1;node2'
4490-
template_label = rate_rule_string.split()[0]
4491-
4492-
template = self.retrieve_template(template_label.split(';'))
4501+
raise ValueError(f'Could not find rate rule in comments for reaction {reaction}.')
44934502
rules, training_entries = self.get_sources_for_template(template)
4494-
4495-
if not template:
4496-
raise ValueError('Could not extract kinetics source from comments for reaction {}.'.format(reaction))
4497-
4498-
source_dict = {'template': template, 'degeneracy': degeneracy, 'exact': exact,
4503+
source_dict = {'template': template, 'degeneracy': degeneracy, 'exact': exact_rule,
44994504
'rules': rules, 'training': training_entries}
45004505

45014506
# Source of the kinetics is from rate rules

0 commit comments

Comments
 (0)