created an ipynb showing the effectiveness of kinetics improvement

KEHANG · connie · commit 4cf074de86db · 2016-02-28T22:15:06.000-05:00
diff --git a/scripts/showNewTrainingRxnsImprovements.ipynb b/scripts/showNewTrainingRxnsImprovements.ipynb
@@ -0,0 +1,282 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from rmgpy.data.rmg import RMGDatabase\n",
+    "from rmgpy.chemkin import saveChemkinFile, saveSpeciesDictionary\n",
+    "from rmgpy.rmg.model import Species, getFamilyLibraryObject, CoreEdgeReactionModel\n",
+    "from rmgpy import settings\n",
+    "from convertKineticsLibraryToTrainingReactions import addAtomLabelsForReaction"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "database = RMGDatabase()\n",
+    "libraries = ['C3']\n",
+    "database.load(settings['database.directory'], kineticsFamilies='all', reactionLibraries = libraries, kineticsDepositories='all')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## step1: find fam_rxn for each lib_rxn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "reactionDict = {}\n",
+    "for libraryName in libraries:\n",
+    "    kineticLibrary = database.kinetics.libraries[libraryName]\n",
+    "    for index, entry in kineticLibrary.entries.iteritems():\n",
+    "        lib_rxn = entry.item\n",
+    "        lib_rxn.kinetics = entry.data \n",
+    "        lib_rxn.index = entry.index\n",
+    "        # Let's make RMG generate this reaction from the families!\n",
+    "        fam_rxn_list = []\n",
+    "        rxt_mol_mutation_num = 1\n",
+    "        pdt_mol_mutation_num = 1\n",
+    "        for reactant in lib_rxn.reactants:\n",
+    "            rxt_mol_mutation_num *= len(reactant.molecule)\n",
+    "\n",
+    "        for product in lib_rxn.products:\n",
+    "            pdt_mol_mutation_num *= len(product.molecule)\n",
+    "\n",
+    "        for mutation_i in range(rxt_mol_mutation_num):\n",
+    "            rxts_mol = [spc.molecule[mutation_i%(len(spc.molecule))] for spc in lib_rxn.reactants]\n",
+    "            pdts_mol = [spc.molecule[0] for spc in lib_rxn.products]\n",
+    "            fam_rxn_list.extend(database.kinetics.generateReactionsFromFamilies(\n",
+    "                        reactants=rxts_mol, products=pdts_mol))\n",
+    "\n",
+    "        if len(fam_rxn_list) == 1:\n",
+    "            fam_rxn = fam_rxn_list[0]       \n",
+    "            lib_reactants = [r for r in lib_rxn.reactants]        \n",
+    "            fam_reactants = [r for r in fam_rxn.reactants]\n",
+    "            for lib_reactant in lib_reactants:\n",
+    "                for fam_reactant in fam_reactants:\n",
+    "                    if lib_reactant.isIsomorphic(fam_reactant):\n",
+    "                        fam_reactants.remove(fam_reactant)\n",
+    "                        break\n",
+    "\n",
+    "            lib_products = [r for r in lib_rxn.products]        \n",
+    "            fam_products = [r for r in fam_rxn.products]\n",
+    "            for lib_product in lib_products:\n",
+    "                for fam_product in fam_products:\n",
+    "                    if lib_product.isIsomorphic(fam_product):\n",
+    "                        fam_products.remove(fam_product)\n",
+    "                        break\n",
+    "\n",
+    "            forward = not (len(fam_reactants) != 0 or len(fam_products) != 0)\n",
+    "            # find the labeled atoms using family and reactants & products from fam_rxn           \n",
+    "            addAtomLabelsForReaction(fam_rxn, database)\n",
+    "            fam_rxn.index = lib_rxn.index\n",
+    "            reactionDict[fam_rxn.family] = [fam_rxn]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from IPython.display import display\n",
+    "for fam_rxn in reactionDict['Intra_R_Add_Endocyclic']:\n",
+    "    print fam_rxn.index\n",
+    "    display(fam_rxn)\n",
+    "    for spec in fam_rxn.reactants + fam_rxn.products:\n",
+    "        print spec.molecule[0].toSMILES()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "for index, entry in kineticLibrary.entries.iteritems():\n",
+    "    if entry.index == fam_rxn.index:\n",
+    "        lib_rxn = entry.item\n",
+    "        lib_rxn.kinetics = entry.data \n",
+    "        lib_rxn.index = entry.index\n",
+    "        break\n",
+    "lib_rxn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "id(fam_rxn.reactants[0].molecule[0]), id(lib_rxn.reactants[0].molecule[0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## step2: get fam_rxn's kinetics"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Before training RMG estimates fam_rxn's kinetics as $ A = 10^9, n = 0.19, E_a = 83.68 kJ/mol $ at [here](http://rmg.mit.edu/database/kinetics/families/Intra_R_Add_Endocyclic/rate_rules/reactant1=multiplicity%25202%250A1%2520%2520C%2520u0%2520p0%2520c0%2520%257B2%252CS%257D%2520%257B3%252CS%257D%2520%257B8%252CS%257D%2520%257B9%252CS%257D%250A2%2520%2520C%2520u0%2520p0%2520c0%2520%257B1%252CS%257D%2520%257B4%252CS%257D%2520%257B5%252CS%257D%2520%257B6%252CS%257D%250A3%2520%2520C%2520u1%2520p0%2520c0%2520%257B1%252CS%257D%2520%257B7%252CD%257D%250A4%2520%2520C%2520u0%2520p0%2520c0%2520%257B2%252CS%257D%2520%257B10%252CT%257D%250A5%2520%2520H%2520u0%2520p0%2520c0%2520%257B2%252CS%257D%250A6%2520%2520H%2520u0%2520p0%2520c0%2520%257B2%252CS%257D%250A7%2520%2520C%2520u0%2520p0%2520c0%2520%257B3%252CD%257D%2520%257B11%252CS%257D%2520%257B12%252CS%257D%250A8%2520%2520H%2520u0%2520p0%2520c0%2520%257B1%252CS%257D%250A9%2520%2520H%2520u0%2520p0%2520c0%2520%257B1%252CS%257D%250A10%2520C%2520u0%2520p0%2520c0%2520%257B4%252CT%257D%2520%257B13%252CS%257D%250A11%2520H%2520u0%2520p0%2520c0%2520%257B7%252CS%257D%250A12%2520H%2520u0%2520p0%2520c0%2520%257B7%252CS%257D%250A13%2520H%2520u0%2520p0%2520c0%2520%257B10%252CS%257D%250A__product1=multiplicity%25202%250A1%2520%2520C%2520u0%2520p0%2520c0%2520%257B2%252CS%257D%2520%257B3%252CS%257D%2520%257B7%252CS%257D%2520%257B8%252CS%257D%250A2%2520%2520C%2520u0%2520p0%2520c0%2520%257B1%252CS%257D%2520%257B4%252CS%257D%2520%257B9%252CS%257D%2520%257B10%252CS%257D%250A3%2520%2520C%2520u0%2520p0%2520c0%2520%257B1%252CS%257D%2520%257B5%252CS%257D%2520%257B6%252CD%257D%250A4%2520%2520C%2520u1%2520p0%2520c0%2520%257B2%252CS%257D%2520%257B5%252CD%257D%250A5%2520%2520C%2520u0%2520p0%2520c0%2520%257B3%252CS%257D%2520%257B4%252CD%257D%2520%257B11%252CS%257D%250A6%2520%2520C%2520u0%2520p0%2520c0%2520%257B3%252CD%257D%2520%257B12%252CS%257D%2520%257B13%252CS%257D%250A7%2520%2520H%2520u0%2520p0%2520c0%2520%257B1%252CS%257D%250A8%2520%2520H%2520u0%2520p0%2520c0%2520%257B1%252CS%257D%250A9%2520%2520H%2520u0%2520p0%2520c0%2520%257B2%252CS%257D%250A10%2520H%2520u0%2520p0%2520c0%2520%257B2%252CS%257D%250A11%2520H%2520u0%2520p0%2520c0%2520%257B5%252CS%257D%250A12%2520H%2520u0%2520p0%2520c0%2520%257B6%252CS%257D%250A13%2520H%2520u0%2520p0%2520c0%2520%257B6%252CS%257D%250A)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## step3: after training get fam_rxn's kinetics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "cem = CoreEdgeReactionModel()\n",
+    "cem.kineticsEstimator = 'rate rules'\n",
+    "cem.verboseComments = True"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from rmgpy.kinetics.kineticsdata import KineticsData\n",
+    "from rmgpy.data.kinetics.family import TemplateReaction\n",
+    "from rmgpy.data.kinetics.depository import DepositoryReaction\n",
+    "\n",
+    "for idx, spec in enumerate(fam_rxn.reactants):\n",
+    "    spec = Species(label=spec.label, molecule=spec.molecule)\n",
+    "    spec.generateThermoData(database)\n",
+    "    fam_rxn.reactants[idx] = spec\n",
+    "for idx, spec in enumerate(fam_rxn.products):\n",
+    "    spec = Species(label=spec.label, molecule=spec.molecule)\n",
+    "    spec.generateThermoData(database)\n",
+    "    fam_rxn.products[idx] = spec\n",
+    "\n",
+    "family = getFamilyLibraryObject(fam_rxn.family)\n",
+    "\n",
+    "# If the reaction already has kinetics (e.g. from a library),\n",
+    "# assume the kinetics are satisfactory\n",
+    "if fam_rxn.kinetics is None:\n",
+    "    # Set the reaction kinetics\n",
+    "    kinetics, source, entry, isForward = cem.generateKinetics(fam_rxn)\n",
+    "    fam_rxn.kinetics = kinetics\n",
+    "    # Flip the reaction direction if the kinetics are defined in the reverse direction\n",
+    "    if not isForward:\n",
+    "        fam_rxn.reactants, fam_rxn.products = fam_rxn.products, fam_rxn.reactants\n",
+    "        fam_rxn.pairs = [(p,r) for r,p in fam_rxn.pairs]\n",
+    "    if family.ownReverse and hasattr(fam_rxn,'reverse'):\n",
+    "        if not isForward:\n",
+    "            fam_rxn.template = fam_rxn.reverse.template\n",
+    "        # We're done with the \"reverse\" attribute, so delete it to save a bit of memory\n",
+    "        delattr(fam_rxn,'reverse')\n",
+    "\n",
+    "# convert KineticsData to Arrhenius forms\n",
+    "if isinstance(fam_rxn.kinetics, KineticsData):\n",
+    "    fam_rxn.kinetics = fam_rxn.kinetics.toArrhenius()\n",
+    "#  correct barrier heights of estimated kinetics\n",
+    "if isinstance(fam_rxn,TemplateReaction) or isinstance(fam_rxn,DepositoryReaction): # i.e. not LibraryReaction\n",
+    "    fam_rxn.fixBarrierHeight() # also converts ArrheniusEP to Arrhenius.\n",
+    "\n",
+    "if cem.pressureDependence and fam_rxn.isUnimolecular():\n",
+    "    # If this is going to be run through pressure dependence code,\n",
+    "    # we need to make sure the barrier is positive.\n",
+    "    fam_rxn.fixBarrierHeight(forcePositive=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "fam_rxn.kinetics"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## step4: compare with lib_rxn's kinetics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "lib_rxn.kinetics"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Conclusion: it improves the kinetics by factor of 10,000 at 673 for this reaction"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 2",
+   "language": "python",
+   "name": "python2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}