From dc9c305b3d92f031d7e61325da9cde9ce2679db2 Mon Sep 17 00:00:00 2001 From: Connie Gao Date: Fri, 12 Feb 2016 18:32:15 -0500 Subject: [PATCH 1/4] Add .ipynb_checkpoints to .gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index a0ebe13c06..76ecb493ad 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,5 @@ # C-Compiled scripts *.pyc + +*.ipynb_checkpoints \ No newline at end of file From cec77ccff916a85c1540b3e0fc215e3068f316ba Mon Sep 17 00:00:00 2001 From: Connie Gao Date: Sun, 28 Feb 2016 21:30:40 -0500 Subject: [PATCH 2/4] fitPolycyclicThermoGroupsFromThermoLibrary script added This script takes thermo libraries and fits new polycyclic groups from them. It saves the all of the polycyclic groups to the file `new_polycyclic.py`. This file can be used to overwrite the original polycyclics thermo groups file in `input/thermo/groups/polycyclic.py`. **IMPORTANT:** It averages any data that is found within the libraries, but will overwrite any old thermo data. If old data is trustworthy, this script must be modified Uncertainties for the groups are calculated as 2s, where s is the sample standard deviation --- ...ycyclicThermoGroupsFromThermoLibrary.ipynb | 414 ++++++++++++++++++ 1 file changed, 414 insertions(+) create mode 100644 scripts/fitPolycyclicThermoGroupsFromThermoLibrary.ipynb diff --git a/scripts/fitPolycyclicThermoGroupsFromThermoLibrary.ipynb b/scripts/fitPolycyclicThermoGroupsFromThermoLibrary.ipynb new file mode 100644 index 0000000000..912bc738a2 --- /dev/null +++ b/scripts/fitPolycyclicThermoGroupsFromThermoLibrary.ipynb @@ -0,0 +1,414 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Fit Polycyclic Thermo Groups From Thermo Library Script\n", + "\n", + "This script takes thermo libraries and fits new polycyclic groups from them. It saves the all of the polycyclic groups to the file `new_polycyclic.py`. This file can be used to overwrite the original polycyclics thermo groups file in `input/thermo/groups/polycyclic.py`.\n", + "\n", + "**IMPORTANT:** It averages any data that is found within the libraries, but will overwrite any old thermo data. If old data is trustworthy, this script must be modified\n", + " \n", + "Uncertainties for the groups are calculated as 2s, where s is the sample standard deviation\n", + "\n", + "Please fill in the list of thermo libraries in the next block below" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Fill in the list of thermo libraries to be used for fitting polycyclic thermo groups\n", + "thermoLibraries = ['vinylCPD_H','C3','C10H11','Fulvene_H','naphthalene_H']\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import os\n", + "import re\n", + "import copy\n", + "import numpy\n", + "from IPython.display import Image, display\n", + "from rmgpy.data.thermo import *\n", + "from rmgpy.data.base import Entry\n", + "from rmgpy.data.rmg import RMGDatabase\n", + "from rmgpy import settings\n", + "from rmgpy.species import Species\n", + "from rmgpy.molecule import Molecule\n", + "from rmgpy.cantherm.output import prettify" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Variety of helper functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "def extractPolycyclicGroups(molecule):\n", + " \"\"\"\n", + " Extract polycyclic functional groups from a real molecule\n", + " \"\"\"\n", + " struct = molecule.copy(deep=True)\n", + " # Saturate the structure if it is a radical\n", + " if struct.isRadical():\n", + " struct.saturate()\n", + " struct.deleteHydrogens()\n", + " \n", + " polyRings = struct.getPolycyclicRings()\n", + " groups = [convertCycleToGroup(ring) for ring in polyRings]\n", + " \n", + " return groups\n", + " \n", + "def convertCycleToGroup(cycle):\n", + " \"\"\"\n", + " This function converts a list of atoms in a cycle to a functional Group object\n", + " \"\"\"\n", + " from rmgpy.molecule.group import GroupAtom, GroupBond, Group\n", + " \n", + " # Create GroupAtom object for each atom in the cycle, label the first one in the cycle with a *\n", + " groupAtoms = {}\n", + " bonds = []\n", + " for atom in cycle:\n", + " groupAtoms[atom] = GroupAtom(atomType=[atom.atomType],\n", + " radicalElectrons=[0],\n", + " label='*' if cycle.index(atom)==0 else '')\n", + " \n", + " group = Group(atoms=groupAtoms.values()) \n", + " \n", + " # Create GroupBond for each bond between atoms in the cycle, but not outside of the cycle\n", + " for atom in cycle:\n", + " for bondedAtom, bond in atom.edges.iteritems():\n", + " if bondedAtom in cycle:\n", + " # create a group bond with the same bond order as in the original molecule,\n", + " # if it hasn't already been created\n", + " if not group.hasBond(groupAtoms[atom],groupAtoms[bondedAtom]):\n", + " group.addBond(GroupBond(groupAtoms[atom],groupAtoms[bondedAtom],order=[bond.order]))\n", + " else:\n", + " pass\n", + " \n", + " group.update()\n", + " \n", + " return group" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Thermo comparison and display functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def displayThermo(thermoData):\n", + " print 'H298 = {0} kcal/mol'.format(thermoData.H298.value_si/4184)\n", + " print 'S298 = {0} cal/mol*K'.format(thermoData.S298.value_si/4.184)\n", + "def compareThermoData(thermoData1, thermoData2):\n", + " delH = thermoData1.H298.value_si - thermoData2.H298.value_si\n", + " print 'Difference in H298 = {0} kcal/mol'.format(delH/4184)\n", + " delS = thermoData1.S298.value_si - thermoData2.S298.value_si\n", + " print 'Difference S298 = {0} cal/mol*K'.format(delS/4.184)\n", + " #Tdata = [300,500,1000,2000]\n", + " #for T in Tdata:\n", + " # delCp = thermoData1.getHeatCapacity(T) - thermoData2.getHeatCapacity(T)\n", + " # print 'Difference in Cp at {0} = {1} cal/mol*K'.format(T, delCp/4.184)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load all the thermo libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "database = RMGDatabase()\n", + "database.load(settings['database.directory'], thermoLibraries = thermoLibraries, kineticsFamilies='none', kineticsDepositories='none', reactionLibraries=[])\n", + "\n", + "thermoDatabase = database.thermo\n", + "thermoDatabase0 = copy.deepcopy(database.thermo)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "## Extract polycyclic groups from thermo libraries and create new ones" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "fittingDictionary={}\n", + "for libraryName in thermoLibraries:\n", + " thermoLibrary = database.thermo.libraries[libraryName]\n", + " for label, entry in thermoLibrary.entries.iteritems():\n", + " molecule = entry.item\n", + " libraryThermoData = entry.data\n", + " if molecule.getAllPolycyclicVertices():\n", + " print label\n", + " species = Species(molecule=[molecule])\n", + " species.generateResonanceIsomers() \n", + " print 'Species has {0} resonance isomers'.format(len(species.molecule))\n", + " estimatedThermos = [thermoDatabase.estimateThermoViaGroupAdditivity(molecule) for molecule in species.molecule]\n", + " for estimatedThermo in estimatedThermos:\n", + " index = estimatedThermos.index(estimatedThermo)\n", + " ringGroups, polycyclicGroups = database.thermo.getRingGroupsFromComments(estimatedThermo)\n", + " \n", + " if len(polycyclicGroups) == 0:\n", + " raise Exception('Species {0} detected as polycyclic but estimated thermo contained no polycyclic groups: \\\n", + " you need to create a new polycyclic group'.format(label))\n", + "\n", + " elif len(polycyclicGroups) == 1:\n", + " polycyclicGroup = polycyclicGroups[0]\n", + " print 'Molecule {0} has a single polycyclic group match in thermo estimate.'.format(species.molecule[index].toSMILES())\n", + " # Draw the molecule in ipython notebook\n", + " display(species.molecule[index])\n", + " print 'Molecule SMILES: {0}'.format(species.molecule[index].toSMILES())\n", + " print 'Estimated thermo data:'\n", + " print prettify(repr(estimatedThermo))\n", + " displayThermo(estimatedThermo)\n", + "\n", + " withoutPolycyclicGroupThermo = removeThermoData(copy.deepcopy(estimatedThermo), polycyclicGroup.data)\n", + " newPolycyclicGroupThermo = removeThermoData(copy.deepcopy(libraryThermoData), withoutPolycyclicGroupThermo)\n", + "\n", + "\n", + " # Check to make sure that the polycyclic group is not generic\n", + " # If it is, create a new polycyclicGroup as the child\n", + " if polycyclicGroup.label == 'PolycyclicRing':\n", + " groups = extractPolycyclicGroups(species.molecule[index])\n", + " print groups[0].toAdjacencyList()\n", + " assert len(groups) == 1\n", + " # Create a new entry in the polycyclic groups with the same name as the thermo library entry\n", + " # Make sure it does not already exist\n", + " entryLabel = label\n", + " counter = 0\n", + " while entryLabel in thermoDatabase.groups['polycyclic'].entries:\n", + " counter += 1\n", + " entryLabel = entryLabel.split('-')[0]\n", + " entryLabel += '-{0}'.format(counter)\n", + "\n", + "\n", + " print 'Polycyclic group was found to be generic \"PolycyclicRing\". Creating new entry: {0}.'.format(entryLabel)\n", + " thermoDatabase.groups['polycyclic'].entries[entryLabel] = Entry(index = len(thermoDatabase.groups['polycyclic'].entries)+1,\n", + " label = entryLabel,\n", + " item = groups[0],\n", + " data = polycyclicGroup.data, # Use dummy thermo here so other estimates can find this group\n", + " parent = polycyclicGroup,\n", + " )\n", + "\n", + " # Set the new entry as the polycyclicGroup and make it a child of the generic group\n", + " polycyclicGroup = thermoDatabase.groups['polycyclic'].entries[entryLabel] \n", + " thermoDatabase.groups['polycyclic'].entries['PolycyclicRing'].children.append(polycyclicGroup)\n", + "\n", + "\n", + " else:\n", + " print 'Matched polycyclic group \"{0}\"'.format(polycyclicGroup.label)\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " # Add the new group value to the fitting dictionary\n", + " if polycyclicGroup not in fittingDictionary:\n", + " # Add a tuple containing fitted group data, the original library entry, and thermo library\n", + " fittingDictionary[polycyclicGroup]=[(newPolycyclicGroupThermo, entry, thermoLibrary)]\n", + " else:\n", + " fittingDictionary[polycyclicGroup].append((newPolycyclicGroupThermo, entry, thermoLibrary))\n", + "\n", + " elif len(polycyclicGroups) > 1:\n", + " print 'Species {0} has matched multiple polycyclic groups. \\\n", + " This cannot be fitted with a single molecule\\'s thermo data.'.format(label)\n", + " raise Exception\n", + " print '====================='" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Fit the polycyclic groups and write to file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "for polycyclicGroup, fittingGroups in fittingDictionary.iteritems():\n", + " print 'Original thermo data for polycyclic group: {0}'.format(polycyclicGroup.label)\n", + " if polycyclicGroup.data:\n", + " print prettify(repr(polycyclicGroup.data))\n", + " else:\n", + " print 'No data found. Was created as a new group.'\n", + " \n", + " thermoDataset = [fitTuple[0] for fitTuple in fittingGroups]\n", + " labels = [fitTuple[1].label for fitTuple in fittingGroups]\n", + " libraryLabels = [fitTuple[2].name for fitTuple in fittingGroups]\n", + " # Average the new group values to fit the original polycyclic group\n", + " fittedGroupData = averageThermoData([fitTuple[0] for fitTuple in fittingGroups])\n", + " #print fittedGroupData\n", + " #print fittingGroups\n", + " polycyclicGroup.data = fittedGroupData\n", + " polycyclicGroup.shortDesc = \"Fitted from thermo library values\"\n", + " \n", + " comment = ''\n", + " for i in range(len(labels)):\n", + " comment += \"Fitted from species {0} from {1} library.\\n\".format(labels[i],libraryLabels[i])\n", + " polycyclicGroup.longDesc = comment.strip()\n", + " \n", + " print 'Fitted thermo data for polycyclic group: {0}'.format(polycyclicGroup.label)\n", + " print prettify(repr(polycyclicGroup.data))\n", + " print polycyclicGroup.longDesc\n", + " print '===================================================================='\n", + " # At this point, save and overwrite the entire polycyclic thermo library\n", + "\n", + "thermoDatabase.groups['polycyclic'].save('new_polycyclic.py')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Benchmark the new groups by checking the estimates against library values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Test that the new group additivity values can estimate the old library ones.\n", + "\n", + "for libraryName in thermoLibraries:\n", + " thermoLibrary = database.thermo.libraries[libraryName]\n", + " for label, entry in thermoLibrary.entries.iteritems():\n", + " molecule = entry.item\n", + " libraryThermoData = entry.data\n", + "\n", + " if molecule.getAllPolycyclicVertices():\n", + " species = Species(molecule=[molecule])\n", + " species.generateResonanceIsomers()\n", + " print label\n", + " display (species.molecule[0])\n", + " print 'Species has {0} resonance isomer(s)'.format(len(species.molecule))\n", + " thermoDatabase.findCp0andCpInf(species, libraryThermoData)\n", + " \n", + " estimatedThermo = thermoDatabase.getThermoDataFromGroups(species)\n", + " originalEstimatedThermo = thermoDatabase0.getThermoDataFromGroups(species)\n", + " if libraryThermoData.isIdenticalTo(estimatedThermo):\n", + " print 'Library thermo data for species {0} matches the estimate from group additivity.\\n'.format(label)\n", + " \n", + " print 'Library thermo data:' \n", + " displayThermo(libraryThermoData)\n", + " print '' \n", + " \n", + " print 'Original estimated thermo data:' \n", + " ringGroups, polycyclicGroups = database.thermo.getRingGroupsFromComments(estimatedThermo)\n", + " print 'Polycyclic groups: {0}'.format(' '.join([grp.label for grp in polycyclicGroups]))\n", + " displayThermo(originalEstimatedThermo)\n", + " print ''\n", + " \n", + " print 'Comparison of library thermo with original estimated thermo'\n", + " compareThermoData(libraryThermoData,originalEstimatedThermo)\n", + " print ''\n", + " else:\n", + " print 'Library thermo data for species {0} does not match the estimate from group additivity\\n'.format(label)\n", + " \n", + " print 'Library thermo data:' \n", + " displayThermo(libraryThermoData)\n", + " print '' \n", + " \n", + " print 'Estimated thermo data:' \n", + " \n", + " ringGroups, polycyclicGroups = database.thermo.getRingGroupsFromComments(estimatedThermo)\n", + " print 'Polycyclic groups: {0}'.format(' '.join([grp.label for grp in polycyclicGroups]))\n", + " displayThermo(estimatedThermo)\n", + " print ''\n", + " \n", + " print 'Comparison of library thermo with estimated thermo'\n", + " compareThermoData(libraryThermoData,estimatedThermo)\n", + " print ''\n", + " \n", + " print 'Original estimated thermo data:' \n", + " \n", + " ringGroups, polycyclicGroups = database.thermo.getRingGroupsFromComments(originalEstimatedThermo)\n", + " print 'Polycyclic groups: {0}'.format(' '.join([grp.label for grp in polycyclicGroups]))\n", + " displayThermo(originalEstimatedThermo)\n", + " print ''\n", + " \n", + " print 'Comparison of library thermo with original estimated thermo'\n", + " compareThermoData(libraryThermoData,originalEstimatedThermo)\n", + " print ''\n", + " print '======================='" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.11" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} From 70daf53b33ab88d4deb32a2300da796a7cafd3f9 Mon Sep 17 00:00:00 2001 From: Connie Gao Date: Sun, 28 Feb 2016 21:54:19 -0500 Subject: [PATCH 3/4] Add convertKineticsLibraryToTrainingReactions script Specify the kinetics library name below and run the script. It automatically overwrites the training reactions files it needs to. Then you should commit those files. This script only trains safely. In other words, if a single match from an RMG family is found, a training reaction is created. Sometimes, there are no matches from RMG reaction families, or multiple matches. This indicates an error that requires manual fixing, and a printout is given in the script. --- ...rtKineticsLibraryToTrainingReactions.ipynb | 394 ++++++++++++++++++ 1 file changed, 394 insertions(+) create mode 100644 scripts/convertKineticsLibraryToTrainingReactions.ipynb diff --git a/scripts/convertKineticsLibraryToTrainingReactions.ipynb b/scripts/convertKineticsLibraryToTrainingReactions.ipynb new file mode 100644 index 0000000000..c5f605caa2 --- /dev/null +++ b/scripts/convertKineticsLibraryToTrainingReactions.ipynb @@ -0,0 +1,394 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Convert Kinetics Library to Training Reactions Script\n", + "\n", + "Specify the kinetics library name below and run the script. It automatically overwrites the training reactions files it needs to. Then you should commit those files.\n", + "\n", + "This script only trains safely. In other words, if a single match from an RMG family is found, a training reaction is created. Sometimes, there are no matches from RMG reaction families, or multiple matches. This indicates an error that requires manual fixing, and a printout is given in the script." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "libraryName = 'vinylCPD_H'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from rmgpy.data.rmg import RMGDatabase\n", + "from rmgpy.chemkin import saveChemkinFile, saveSpeciesDictionary\n", + "from rmgpy.rmg.model import Species\n", + "from rmgpy import settings\n", + "from convertKineticsLibraryToTrainingReactions import addAtomLabelsForReaction" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## load lib_rxn" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "database = RMGDatabase()\n", + "database.load(settings['database.directory'], kineticsFamilies='all', reactionLibraries = [libraryName], kineticsDepositories='all')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## generate fam_rxn, spec replacement and get reactionDict" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "scrolled": false + }, + "outputs": [], + "source": [ + "reactionDict = {}\n", + "kineticLibrary = database.kinetics.libraries[libraryName]\n", + "for index, entry in kineticLibrary.entries.iteritems():\n", + " lib_rxn = entry.item\n", + " lib_rxn.kinetics = entry.data \n", + " lib_rxn.index = entry.index\n", + " lib_rxn.kinetics.comment = entry.label # Assign the entry's label to the comment\n", + " # Let's make RMG try to generate this reaction from the families!\n", + " fam_rxn_list = []\n", + " rxt_mol_mutation_num = 1\n", + " pdt_mol_mutation_num = 1\n", + " for reactant in lib_rxn.reactants:\n", + " rxt_mol_mutation_num *= len(reactant.molecule)\n", + "\n", + " for product in lib_rxn.products:\n", + " pdt_mol_mutation_num *= len(product.molecule)\n", + "\n", + " for mutation_i in range(rxt_mol_mutation_num):\n", + " rxts_mol = [spc.molecule[mutation_i%(len(spc.molecule))] for spc in lib_rxn.reactants]\n", + " pdts_mol = [spc.molecule[0] for spc in lib_rxn.products]\n", + " fam_rxn_list.extend(database.kinetics.generateReactionsFromFamilies(\n", + " reactants=rxts_mol, products=pdts_mol))\n", + "\n", + "\n", + " if len(fam_rxn_list) == 1:\n", + " fam_rxn = fam_rxn_list[0]\n", + "\n", + " # danger: the fam_rxn may have switched the reactants with products\n", + " # fam_rxn is survived from def filterReactions\n", + " # so it's matched with lib_rxn only we have to \n", + " # determine the direction\n", + " lib_reactants = [r for r in lib_rxn.reactants] \n", + " fam_reactants = [r for r in fam_rxn.reactants]\n", + " for lib_reactant in lib_reactants:\n", + " for fam_reactant in fam_reactants:\n", + " if lib_reactant.isIsomorphic(fam_reactant):\n", + " fam_reactants.remove(fam_reactant)\n", + " break\n", + "\n", + " lib_products = [r for r in lib_rxn.products] \n", + " fam_products = [r for r in fam_rxn.products]\n", + " for lib_product in lib_products:\n", + " for fam_product in fam_products:\n", + " if lib_product.isIsomorphic(fam_product):\n", + " fam_products.remove(fam_product)\n", + " break\n", + "\n", + " forward = not (len(fam_reactants) != 0 or len(fam_products) != 0)\n", + " # find the labeled atoms using family and reactants & products from fam_rxn \n", + " addAtomLabelsForReaction(fam_rxn, database)\n", + " # species replacement so that labeledAtoms is retored\n", + " if forward:\n", + " lib_rxn.reactants = fam_rxn.reactants\n", + " lib_rxn.products = fam_rxn.products\n", + " else:\n", + " lib_rxn.reactants = fam_rxn.products\n", + " lib_rxn.products = fam_rxn.reactants\n", + " if fam_rxn.family in reactionDict:\n", + " reactionDict[fam_rxn.family].append(lib_rxn)\n", + " else:\n", + " reactionDict[fam_rxn.family] = [lib_rxn]\n", + "\n", + " elif len(fam_rxn_list) == 0:\n", + " print \"Sad :( There are no matches. This is a magic reaction or has chemistry that should be made into a new reaction family\"\n", + " print ''\n", + " print lib_rxn\n", + " print ''\n", + " print 'Reactant SMILES:'\n", + " for reactant in lib_rxn.reactants:\n", + " print reactant.molecule[0].toSMILES()\n", + " print 'Product SMILES:'\n", + " for product in lib_rxn.products:\n", + " print product.molecule[0].toSMILES()\n", + " print '==============='\n", + " else:\n", + " print \"There are multiple RMG matches for this reaction. You have to manually create this training reaction\"\n", + " print ''\n", + " print lib_rxn\n", + " print ''\n", + " print 'Reactant SMILES:'\n", + " for reactant in lib_rxn.reactants:\n", + " print reactant.molecule[0].toSMILES()\n", + " print 'Product SMILES'\n", + " for product in lib_rxn.products:\n", + " print product.molecule[0].toSMILES()\n", + " print ''\n", + " print 'The following families were matched:'\n", + " for rxn in fam_rxn_list:\n", + " print rxn.family\n", + " print '==============='\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "for familyName in reactionDict:\n", + " print 'Adding training reactions for family: ' + familyName\n", + " kineticFamily = database.kinetics.families[familyName]\n", + " trainingDatabase = None\n", + " for depository in kineticFamily.depositories:\n", + " if depository.label.endswith('training'):\n", + " trainingDatabase = depository\n", + " break\n", + " reactions = reactionDict[familyName]\n", + " print 'reactions.py previously has {} rxns. Now adding {} new rxn(s).'.format(len(trainingDatabase.entries.values()), len(reactions))\n", + " print '================='\n", + " kineticFamily.saveTrainingReactions(reactions, shortDesc='Training reaction from kinetics library: {0}'.format(libraryName))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How saveTrainingReaction works\n", + "\n", + "This part of the script is commented and should not be run. It serves only to demonstrate how the code for saving the training reactions works.\n", + "\n", + "## get speciesDict\n", + "\n", + "### load existing species as an intial speciesDict" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# import os\n", + "# from rmgpy.data.base import Database\n", + "\n", + "# training_path = os.path.join(settings['database.directory'], \\\n", + "# 'kinetics', 'families', 'R_Addition_MultipleBond', 'training')\n", + "\n", + "# dictionary_file = os.path.join(training_path, 'dictionary.txt')\n", + "\n", + "# # Load the existing set of the species of the training reactions\n", + "# speciesDict = Database().getSpecies(dictionary_file)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### for one family check uniqueness of each species in the lib_rxns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "scrolled": false + }, + "outputs": [], + "source": [ + "# familyName = 'R_Addition_MultipleBond'\n", + "# print 'Adding training reactions for family: ' + familyName\n", + "# kineticFamily = database.kinetics.families[familyName]\n", + "# reactions = reactionDict[familyName]\n", + "\n", + "# for rxn in reactions:\n", + "# for spec in (rxn.reactants + rxn.products):\n", + "# for ex_spec_label in speciesDict:\n", + "# ex_spec = speciesDict[ex_spec_label]\n", + "# if ex_spec.molecule[0].getFormula() != spec.molecule[0].getFormula():\n", + "# continue\n", + "# else:\n", + "# spec_labeledAtoms = spec.molecule[0].getLabeledAtoms()\n", + "# ex_spec_labeledAtoms = ex_spec.molecule[0].getLabeledAtoms()\n", + "# initialMap = {}\n", + "# try:\n", + "# for atomLabel in spec_labeledAtoms:\n", + "# initialMap[spec_labeledAtoms[atomLabel]] = ex_spec_labeledAtoms[atomLabel]\n", + "# except KeyError:\n", + "# # atom labels did not match, therefore not a match\n", + "# continue\n", + "# if spec.molecule[0].isIsomorphic(ex_spec.molecule[0],initialMap):\n", + "# spec.label = ex_spec.label\n", + "# break\n", + "# else:# no isomorphic existing species found\n", + "# spec_formula = spec.molecule[0].getFormula()\n", + "# if spec_formula not in speciesDict:\n", + "# spec.label = spec_formula\n", + "# else:\n", + "# index = 2\n", + "# while (spec_formula + '-{}'.format(index)) in speciesDict:\n", + "# index += 1\n", + "# spec.label = spec_formula + '-{}'.format(index)\n", + "# speciesDict[spec.label] = spec" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## save to files\n", + "\n", + "Save reactionDict to reactions.py and speciesDict to dictionary.txt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# # try to append \n", + "# training_file = open(os.path.join(settings['database.directory'], 'kinetics', 'families', \\\n", + "# kineticFamily.label, 'training', 'reactions_test.py'), 'a')\n", + "\n", + "# training_file.write(\"\\n\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# # find the largest reaction index\n", + "# for depository in kineticFamily.depositories:\n", + "# if depository.label.endswith('training'):\n", + "# break\n", + "# else:\n", + "# logging.info('Could not find training depository in family {0}.'.format(kineticFamily.label))\n", + "# logging.info('Starting a new one')\n", + "# depository = KineticsDepository()\n", + "# kineticFamily.depositories.append(depository)\n", + "\n", + "# trainingDatabase = depository\n", + "# indices = [entry.index for entry in trainingDatabase.entries.values()]\n", + "# if indices:\n", + "# maxIndex = max(indices)\n", + "# else:\n", + "# maxIndex = 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# # save reactions.py\n", + "# from rmgpy.data.base import Entry\n", + "# for i, reaction in enumerate(reactions): \n", + "# entry = Entry(\n", + "# index = maxIndex+i+1,\n", + "# label = str(reaction),\n", + "# item = reaction,\n", + "# data = reaction.kinetics,\n", + "# reference = None,\n", + "# referenceType = '',\n", + "# shortDesc = unicode(''),\n", + "# longDesc = unicode(''),\n", + "# rank = 3,\n", + "# )\n", + "# print reaction\n", + "# kineticFamily.saveEntry(training_file, entry)\n", + "\n", + "# training_file.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# # save dictionary.txt\n", + "# directory_test_file = os.path.join(training_path, 'directory_test.txt')\n", + "# with open(directory_test_file, 'w') as f:\n", + "# for label in speciesDict.keys():\n", + "# f.write(speciesDict[label].molecule[0].toAdjacencyList(label=label, removeH=False))\n", + "# f.write('\\n')\n", + "# f.close()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.11" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} From 4cf074de86db7ddfc335bef7765bad5f7e91636b Mon Sep 17 00:00:00 2001 From: KEHANG Date: Sun, 21 Feb 2016 14:56:43 -0500 Subject: [PATCH 4/4] created an ipynb showing the effectiveness of kinetics improvement --- scripts/showNewTrainingRxnsImprovements.ipynb | 282 ++++++++++++++++++ 1 file changed, 282 insertions(+) create mode 100644 scripts/showNewTrainingRxnsImprovements.ipynb diff --git a/scripts/showNewTrainingRxnsImprovements.ipynb b/scripts/showNewTrainingRxnsImprovements.ipynb new file mode 100644 index 0000000000..91c49f88fe --- /dev/null +++ b/scripts/showNewTrainingRxnsImprovements.ipynb @@ -0,0 +1,282 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from rmgpy.data.rmg import RMGDatabase\n", + "from rmgpy.chemkin import saveChemkinFile, saveSpeciesDictionary\n", + "from rmgpy.rmg.model import Species, getFamilyLibraryObject, CoreEdgeReactionModel\n", + "from rmgpy import settings\n", + "from convertKineticsLibraryToTrainingReactions import addAtomLabelsForReaction" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "database = RMGDatabase()\n", + "libraries = ['C3']\n", + "database.load(settings['database.directory'], kineticsFamilies='all', reactionLibraries = libraries, kineticsDepositories='all')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## step1: find fam_rxn for each lib_rxn" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "reactionDict = {}\n", + "for libraryName in libraries:\n", + " kineticLibrary = database.kinetics.libraries[libraryName]\n", + " for index, entry in kineticLibrary.entries.iteritems():\n", + " lib_rxn = entry.item\n", + " lib_rxn.kinetics = entry.data \n", + " lib_rxn.index = entry.index\n", + " # Let's make RMG generate this reaction from the families!\n", + " fam_rxn_list = []\n", + " rxt_mol_mutation_num = 1\n", + " pdt_mol_mutation_num = 1\n", + " for reactant in lib_rxn.reactants:\n", + " rxt_mol_mutation_num *= len(reactant.molecule)\n", + "\n", + " for product in lib_rxn.products:\n", + " pdt_mol_mutation_num *= len(product.molecule)\n", + "\n", + " for mutation_i in range(rxt_mol_mutation_num):\n", + " rxts_mol = [spc.molecule[mutation_i%(len(spc.molecule))] for spc in lib_rxn.reactants]\n", + " pdts_mol = [spc.molecule[0] for spc in lib_rxn.products]\n", + " fam_rxn_list.extend(database.kinetics.generateReactionsFromFamilies(\n", + " reactants=rxts_mol, products=pdts_mol))\n", + "\n", + " if len(fam_rxn_list) == 1:\n", + " fam_rxn = fam_rxn_list[0] \n", + " lib_reactants = [r for r in lib_rxn.reactants] \n", + " fam_reactants = [r for r in fam_rxn.reactants]\n", + " for lib_reactant in lib_reactants:\n", + " for fam_reactant in fam_reactants:\n", + " if lib_reactant.isIsomorphic(fam_reactant):\n", + " fam_reactants.remove(fam_reactant)\n", + " break\n", + "\n", + " lib_products = [r for r in lib_rxn.products] \n", + " fam_products = [r for r in fam_rxn.products]\n", + " for lib_product in lib_products:\n", + " for fam_product in fam_products:\n", + " if lib_product.isIsomorphic(fam_product):\n", + " fam_products.remove(fam_product)\n", + " break\n", + "\n", + " forward = not (len(fam_reactants) != 0 or len(fam_products) != 0)\n", + " # find the labeled atoms using family and reactants & products from fam_rxn \n", + " addAtomLabelsForReaction(fam_rxn, database)\n", + " fam_rxn.index = lib_rxn.index\n", + " reactionDict[fam_rxn.family] = [fam_rxn]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from IPython.display import display\n", + "for fam_rxn in reactionDict['Intra_R_Add_Endocyclic']:\n", + " print fam_rxn.index\n", + " display(fam_rxn)\n", + " for spec in fam_rxn.reactants + fam_rxn.products:\n", + " print spec.molecule[0].toSMILES()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "for index, entry in kineticLibrary.entries.iteritems():\n", + " if entry.index == fam_rxn.index:\n", + " lib_rxn = entry.item\n", + " lib_rxn.kinetics = entry.data \n", + " lib_rxn.index = entry.index\n", + " break\n", + "lib_rxn" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "id(fam_rxn.reactants[0].molecule[0]), id(lib_rxn.reactants[0].molecule[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## step2: get fam_rxn's kinetics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Before training RMG estimates fam_rxn's kinetics as $ A = 10^9, n = 0.19, E_a = 83.68 kJ/mol $ at [here](http://rmg.mit.edu/database/kinetics/families/Intra_R_Add_Endocyclic/rate_rules/reactant1=multiplicity%25202%250A1%2520%2520C%2520u0%2520p0%2520c0%2520%257B2%252CS%257D%2520%257B3%252CS%257D%2520%257B8%252CS%257D%2520%257B9%252CS%257D%250A2%2520%2520C%2520u0%2520p0%2520c0%2520%257B1%252CS%257D%2520%257B4%252CS%257D%2520%257B5%252CS%257D%2520%257B6%252CS%257D%250A3%2520%2520C%2520u1%2520p0%2520c0%2520%257B1%252CS%257D%2520%257B7%252CD%257D%250A4%2520%2520C%2520u0%2520p0%2520c0%2520%257B2%252CS%257D%2520%257B10%252CT%257D%250A5%2520%2520H%2520u0%2520p0%2520c0%2520%257B2%252CS%257D%250A6%2520%2520H%2520u0%2520p0%2520c0%2520%257B2%252CS%257D%250A7%2520%2520C%2520u0%2520p0%2520c0%2520%257B3%252CD%257D%2520%257B11%252CS%257D%2520%257B12%252CS%257D%250A8%2520%2520H%2520u0%2520p0%2520c0%2520%257B1%252CS%257D%250A9%2520%2520H%2520u0%2520p0%2520c0%2520%257B1%252CS%257D%250A10%2520C%2520u0%2520p0%2520c0%2520%257B4%252CT%257D%2520%257B13%252CS%257D%250A11%2520H%2520u0%2520p0%2520c0%2520%257B7%252CS%257D%250A12%2520H%2520u0%2520p0%2520c0%2520%257B7%252CS%257D%250A13%2520H%2520u0%2520p0%2520c0%2520%257B10%252CS%257D%250A__product1=multiplicity%25202%250A1%2520%2520C%2520u0%2520p0%2520c0%2520%257B2%252CS%257D%2520%257B3%252CS%257D%2520%257B7%252CS%257D%2520%257B8%252CS%257D%250A2%2520%2520C%2520u0%2520p0%2520c0%2520%257B1%252CS%257D%2520%257B4%252CS%257D%2520%257B9%252CS%257D%2520%257B10%252CS%257D%250A3%2520%2520C%2520u0%2520p0%2520c0%2520%257B1%252CS%257D%2520%257B5%252CS%257D%2520%257B6%252CD%257D%250A4%2520%2520C%2520u1%2520p0%2520c0%2520%257B2%252CS%257D%2520%257B5%252CD%257D%250A5%2520%2520C%2520u0%2520p0%2520c0%2520%257B3%252CS%257D%2520%257B4%252CD%257D%2520%257B11%252CS%257D%250A6%2520%2520C%2520u0%2520p0%2520c0%2520%257B3%252CD%257D%2520%257B12%252CS%257D%2520%257B13%252CS%257D%250A7%2520%2520H%2520u0%2520p0%2520c0%2520%257B1%252CS%257D%250A8%2520%2520H%2520u0%2520p0%2520c0%2520%257B1%252CS%257D%250A9%2520%2520H%2520u0%2520p0%2520c0%2520%257B2%252CS%257D%250A10%2520H%2520u0%2520p0%2520c0%2520%257B2%252CS%257D%250A11%2520H%2520u0%2520p0%2520c0%2520%257B5%252CS%257D%250A12%2520H%2520u0%2520p0%2520c0%2520%257B6%252CS%257D%250A13%2520H%2520u0%2520p0%2520c0%2520%257B6%252CS%257D%250A)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## step3: after training get fam_rxn's kinetics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "cem = CoreEdgeReactionModel()\n", + "cem.kineticsEstimator = 'rate rules'\n", + "cem.verboseComments = True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from rmgpy.kinetics.kineticsdata import KineticsData\n", + "from rmgpy.data.kinetics.family import TemplateReaction\n", + "from rmgpy.data.kinetics.depository import DepositoryReaction\n", + "\n", + "for idx, spec in enumerate(fam_rxn.reactants):\n", + " spec = Species(label=spec.label, molecule=spec.molecule)\n", + " spec.generateThermoData(database)\n", + " fam_rxn.reactants[idx] = spec\n", + "for idx, spec in enumerate(fam_rxn.products):\n", + " spec = Species(label=spec.label, molecule=spec.molecule)\n", + " spec.generateThermoData(database)\n", + " fam_rxn.products[idx] = spec\n", + "\n", + "family = getFamilyLibraryObject(fam_rxn.family)\n", + "\n", + "# If the reaction already has kinetics (e.g. from a library),\n", + "# assume the kinetics are satisfactory\n", + "if fam_rxn.kinetics is None:\n", + " # Set the reaction kinetics\n", + " kinetics, source, entry, isForward = cem.generateKinetics(fam_rxn)\n", + " fam_rxn.kinetics = kinetics\n", + " # Flip the reaction direction if the kinetics are defined in the reverse direction\n", + " if not isForward:\n", + " fam_rxn.reactants, fam_rxn.products = fam_rxn.products, fam_rxn.reactants\n", + " fam_rxn.pairs = [(p,r) for r,p in fam_rxn.pairs]\n", + " if family.ownReverse and hasattr(fam_rxn,'reverse'):\n", + " if not isForward:\n", + " fam_rxn.template = fam_rxn.reverse.template\n", + " # We're done with the \"reverse\" attribute, so delete it to save a bit of memory\n", + " delattr(fam_rxn,'reverse')\n", + "\n", + "# convert KineticsData to Arrhenius forms\n", + "if isinstance(fam_rxn.kinetics, KineticsData):\n", + " fam_rxn.kinetics = fam_rxn.kinetics.toArrhenius()\n", + "# correct barrier heights of estimated kinetics\n", + "if isinstance(fam_rxn,TemplateReaction) or isinstance(fam_rxn,DepositoryReaction): # i.e. not LibraryReaction\n", + " fam_rxn.fixBarrierHeight() # also converts ArrheniusEP to Arrhenius.\n", + "\n", + "if cem.pressureDependence and fam_rxn.isUnimolecular():\n", + " # If this is going to be run through pressure dependence code,\n", + " # we need to make sure the barrier is positive.\n", + " fam_rxn.fixBarrierHeight(forcePositive=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "fam_rxn.kinetics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## step4: compare with lib_rxn's kinetics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "lib_rxn.kinetics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Conclusion: it improves the kinetics by factor of 10,000 at 673 for this reaction" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.11" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}