nlesc-nano · BvB93 · May 27, 2020 · May 28, 2020 · May 27, 2020 · May 28, 2020
diff --git a/README.rst b/README.rst
@@ -12,7 +12,7 @@
 
 
 ##############
-Nano-CAT 0.6.1
+Nano-CAT 0.7.0
 ##############
 
 **Nano-CAT** is a collection of tools for the analysis of nanocrystals,

diff --git a/nanoCAT/__version__.py b/nanoCAT/__version__.py
@@ -1 +1 @@
-__version__ = '0.6.1'
+__version__ = '0.7.0'
diff --git a/nanoCAT/asa/asa.py b/nanoCAT/asa/asa.py
@@ -53,18 +53,25 @@ def init_asa(qd_df: SettingsDataFrame) -> None:
 
     """
     workflow = WorkFlow.from_template(qd_df, name='asa')
+    columns = workflow.import_columns.keys()
+    for i in columns:
+        qd_df[i] = 0.0
 
     # Run the activation strain workflow
-    idx = workflow.from_db(qd_df)
+    df_bool = workflow.from_db(qd_df, 'ASA')
+    columns_subset = columns - df_bool.columns
+    for i in columns_subset:
+        df_bool[i] = True
+
+    idx = df_bool['ASA'].all(axis=1)
     if workflow.md:
         workflow(get_asa_md, qd_df, index=idx)
     else:
         workflow(get_asa_energy, qd_df, index=idx)
 
     # Prepare for results exporting
     qd_df[JOB_SETTINGS_ASA] = workflow.pop_job_settings(qd_df[MOL])
-    job_recipe = workflow.get_recipe()
-    workflow.to_db(qd_df, index=idx, job_recipe=job_recipe)
+    workflow.to_db(qd_df, df_bool, columns=workflow.export_columns)
 
 
 def get_asa_energy(mol_list: Iterable[Molecule],

diff --git a/nanoCAT/bde/bde_workflow.py b/nanoCAT/bde/bde_workflow.py
@@ -32,6 +32,7 @@
 from itertools import product
 
 import numpy as np
+import pandas as pd
 
 from scm.plams import AMSJob, Molecule, Settings, Cp2kJob
 from scm.plams.core.basejob import Job
@@ -45,6 +46,12 @@
 from .dissociate_xyn import dissociate_ligand
 from ..qd_opt_ff import qd_opt_ff
 
+try:
+    import h5py
+    _LABEL_DTYPE = h5py.string_dtype(encoding='ascii')
+except ImportError:
+    _LABEL_DTYPE = np.dtype(object)
+
 __all__ = ['init_bde']
 
 
@@ -55,10 +62,17 @@ def init_bde(qd_df: SettingsDataFrame) -> None:
 
     # Create columns
     columns = _construct_columns(workflow, qd_df[MOL])
-    import_columns = {(i, j): (np.nan if i != 'label' else None) for i, j in columns}
+    columns.names = qd_df.columns.names
+    for i, j in columns:
+        if i == 'label':
+            qd_df[i, j] = np.array('', dtype=_LABEL_DTYPE).take(0)
+        else:
+            qd_df[i, j] = 0.0
 
     # Pull from the database; push unoptimized structures
-    idx = workflow.from_db(qd_df, columns=import_columns)
+    df_bool = workflow.from_db(qd_df, columns=columns.levels[0])
+
+    idx = df_bool[columns].all()
     workflow(start_bde, qd_df, columns=columns, index=idx, workflow=workflow)
 
     # Convert the datatype from object back to float
@@ -72,11 +86,10 @@ def init_bde(qd_df: SettingsDataFrame) -> None:
     qd_df[JOB_SETTINGS_BDE] = workflow.pop_job_settings(qd_df[MOL])
 
     # Push the optimized structures to the database
-    job_recipe = workflow.get_recipe()
-    workflow.to_db(qd_df, index=idx, columns=columns, job_recipe=job_recipe)
+    workflow.to_db(qd_df, df_bool, columns=columns)
 
 
-def _construct_columns(workflow: WorkFlow, mol_list: Iterable[Molecule]) -> List[Tuple[str, str]]:
+def _construct_columns(workflow: WorkFlow, mol_list: Iterable[Molecule]) -> pd.MultiIndex:
     """Construct BDE columns for :func:`init_bde`."""
     if workflow.core_index:
         stop = len(workflow.core_index)
@@ -94,8 +107,8 @@ def _construct_columns(workflow: WorkFlow, mol_list: Iterable[Molecule]) -> List
     if workflow.jobs[1]:  # i.e. thermochemical corrections are enabled
         super_keys += ('BDE ddG', 'BDE dG')
 
-    sub_keys = np.arange(stop).astype(dtype=str)
-    return list(product(super_keys, sub_keys))
+    sub_keys = np.arange(stop)
+    return pd.MultiIndex.from_product((super_keys, sub_keys))
 
 
 def start_bde(mol_list: Iterable[Molecule],

diff --git a/nanoCAT/bde/dissociate_xyn.py b/nanoCAT/bde/dissociate_xyn.py
@@ -32,8 +32,8 @@
 
 from itertools import chain, combinations
 from typing import (
-    Union, Mapping, Iterable, Tuple, Dict, List, Optional, FrozenSet, Generator,
-    Any, TypeVar, SupportsInt, Set, Collection
+    Union, Mapping, Iterable, Tuple, Dict, List, Optional, Set, Generator,
+    Any, TypeVar, SupportsInt, Set, Collection, FrozenSet, cast
 )
 
 import numpy as np
@@ -46,7 +46,7 @@
 from CAT.utils import iter_repeat
 from CAT.mol_utils import to_atnum
 from CAT.attachment.ligand_anchoring import _smiles_to_rdmol
-from nanoutils import group_by_values
+from nanoutils import group_by_values, as_nd_array
 
 from .guess_core_dist import guess_core_core_dist
 from .identify_surface import identify_surface
@@ -192,7 +192,7 @@ def dissociate_ligand(mol: Molecule,
 
 def _lig_mapping(mol: Molecule, idx: Iterable[int]) -> IdxMapping:
     """Map **idx** to all atoms with the same residue number."""
-    idx = as_array(idx, dtype=int)  # 1-based indices
+    idx = as_nd_array(idx, dtype=int)  # 1-based indices
 
     iterator = ((i, at.properties.pdb_info.get('ResidueNumber', i)) for i, at in enumerate(mol, 1))
     lig_mapping = group_by_values(iterator)
@@ -203,7 +203,7 @@ def _lig_mapping(mol: Molecule, idx: Iterable[int]) -> IdxMapping:
 
 def _core_mapping(mol: Molecule, idx: Iterable[int], smiles: str) -> IdxMapping:
     """Map **idx** to all atoms part of the same substructure (see **smiles**)."""
-    idx = as_array(idx, dtype=int)  # 1-based indices
+    idx = as_nd_array(idx, dtype=int)  # 1-based indices
 
     rdmol = molkit.to_rdmol(mol)
     rd_smiles = _smiles_to_rdmol(smiles)
@@ -222,7 +222,7 @@ def _core_mapping(mol: Molecule, idx: Iterable[int], smiles: str) -> IdxMapping:
 class DummyGetter:
     """A mapping placeholder; calling `__getitem__` will return the supplied key embedded within a tuple."""  # noqa
 
-    def __getitem__(self, key: SupportsInt) -> Tuple[int]: return (key,)
+    def __getitem__(self, key: T) -> Tuple[T]: return (key,)
 
 
 _DUMMY_GETTER = DummyGetter()
@@ -297,7 +297,7 @@ def core_idx(self) -> np.ndarray: return self._core_idx
 
     @core_idx.setter
     def core_idx(self, value: Union[int, Iterable[int]]) -> None:
-        self._core_idx = core_idx = as_array(value, dtype=int, ndmin=1, copy=True)
+        self._core_idx = core_idx = as_nd_array(value, dtype=int, ndmin=1, copy=True)
         core_idx -= 1
         core_idx.sort()
 
@@ -319,7 +319,7 @@ def topology(self) -> Mapping[int, str]: return self._topology
     def topology(self, value: Optional[Mapping[int, str]]) -> None:
         self._topology = value or {}
 
-    _PRIVATE_ATTR: FrozenSet[str] = frozenset({'_coords'})
+    _PRIVATE_ATTR: Set[str] = frozenset({'_coords'})  # type: ignore
 
     def __init__(self, mol: Molecule,
                  core_idx: Union[int, Iterable[int]],
@@ -462,7 +462,7 @@ def get_pairs_closest(self, lig_idx: Union[int, Iterable[int]],
         # Extract instance variables
         xyz: np.ndarray = self._coords
         i: np.ndarray = self.core_idx
-        j: np.ndarray = as_array(lig_idx, dtype=int) - 1
+        j: np.ndarray = as_nd_array(lig_idx, dtype=int) - 1
         n: int = self.ligand_count
 
         # Find all core atoms within a radius **max_dist** from a ligand
@@ -516,7 +516,7 @@ def get_pairs_distance(self, lig_idx: Union[int, Iterable[int]],
         # Extract instance variables
         xyz: np.ndarray = self._coords
         i: np.ndarray = self.core_idx
-        j: np.ndarray = as_array(lig_idx, dtype=int) - 1
+        j: np.ndarray = as_nd_array(lig_idx, dtype=int) - 1
         n: int = self.ligand_count
 
         # Find all core atoms within a radius **max_dist** from a ligand
@@ -655,20 +655,3 @@ def _get_new_indices(self, core_is_lig: bool = False) -> List[int]:
         for i in ret:
             i -= 1
         return ret
-
-
-def as_array(iterable: Iterable, dtype: Union[None, str, type, np.dtype] = None,
-             copy: bool = False, ndmin: int = 0) -> np.ndarray:
-    """Convert a generic iterable (including iterators) into a NumPy array.
-
-    See :func:`numpy.array` for an extensive description of all parameters.
-
-    """
-    try:
-        ret = np.array(iterable, dtype=dtype, copy=copy)
-    except TypeError:  # **iterable** is an iterator
-        ret = np.fromiter(iterable, dtype=dtype)
-
-    if ret.ndim < ndmin:
-        ret.shape += (1,) * (ndmin - ret.ndim)
-    return ret
diff --git a/nanoCAT/cdft.py b/nanoCAT/cdft.py
@@ -8,7 +8,7 @@
 from qmflows.packages.SCM import ADF_Result
 from scm.plams import Molecule, Settings, ADFJob, ADFResults, Units, Results
 from scm.plams.core.basejob import Job
-from CAT.workflows import WorkFlow, JOB_SETTINGS_CDFT, MOL
+from CAT.workflows import WorkFlow, JOB_SETTINGS_CDFT, MOL, CDFT_CHI
 from CAT.jobs import job_single_point
 from CAT.settings_dataframe import SettingsDataFrame
 
@@ -34,6 +34,8 @@
 cdft.specific.adf = _templates.singlepoint.specific.adf.copy()
 cdft += Settings(yaml.safe_load(_CDFT))
 
+CDFT = CDFT_CHI[0]
+
 
 def init_cdft(ligand_df: SettingsDataFrame) -> None:
     r"""Initialize the ligand conceptual dft (CDFT) workflow.
@@ -45,18 +47,24 @@ def init_cdft(ligand_df: SettingsDataFrame) -> None:
 
     """
     workflow = WorkFlow.from_template(ligand_df, name='cdft')
+    for k, v in workflow.import_columns.items():
+        ligand_df[k] = v
 
     # Import from the database and start the calculation
-    idx = workflow.from_db(ligand_df)
+    df_bool = workflow.from_db(ligand_df, CDFT)
+    column_subset = workflow.import_columns.keys() - df_bool.columns
+    for k in column_subset:
+        df_bool[k] = True
+
+    idx = df_bool[CDFT].any(axis=1)
     workflow(start_crs_jobs, ligand_df, index=idx)
 
     # Sets a nested list with the filenames of .in files
     # This cannot be done with loc is it will try to expand the list into a 2D array
     ligand_df[JOB_SETTINGS_CDFT] = workflow.pop_job_settings(ligand_df[MOL])
 
     # Export to the database
-    job_recipe = workflow.get_recipe()
-    workflow.to_db(ligand_df, index=idx, job_recipe=job_recipe)
+    workflow.to_db(ligand_df, df_bool, columns=workflow.export_columns)
 
 
 def start_crs_jobs(mol_list: Iterable[Molecule],

diff --git a/nanoCAT/ligand_solvation.py b/nanoCAT/ligand_solvation.py
@@ -40,6 +40,7 @@
 from typing import Optional, Sequence, Collection, Tuple, List, Iterable, Any, Type, Iterator
 
 import numpy as np
+import pandas as pd
 
 from scm.plams import Settings, Molecule, Results, CRSJob, CRSResults, JobRunner, ADFJob
 from scm.plams.core.basejob import Job
@@ -77,20 +78,22 @@ def init_solv(ligand_df: SettingsDataFrame,
     # Create column slices
     solvent_list = get_solvent_list(solvent_list)
     columns = get_solvent_columns(solvent_list)
-
-    # Create new import and export columns
-    import_columns = {k: np.nan for k in columns}
-    import_columns.update(workflow.import_columns)
-    export_columns = columns + list(workflow.import_columns)
+    for i in columns:
+        ligand_df[i] = 0.0
 
     # Create index slices and run the workflow
-    idx = workflow.from_db(ligand_df, columns=import_columns)
+    df_bool = workflow.from_db(ligand_df, *columns.levels[0].values)
+    column_subset = columns.difference(df_bool.columns)
+    for i in column_subset:
+        df_bool[i] = True
+
+    idx = df_bool[columns].any(axis=1)
     workflow(start_crs_jobs, ligand_df, index=idx, columns=columns, solvent_list=solvent_list)
 
     # Export results back to the database
-    job_recipe = workflow.get_recipe()
     ligand_df[JOB_SETTINGS_CRS] = workflow.pop_job_settings(ligand_df[MOL])
-    workflow.to_db(ligand_df, index=idx, columns=export_columns, job_recipe=job_recipe)
+    export_columns = columns.append(pd.Index([JOB_SETTINGS_CRS]))
+    workflow.to_db(ligand_df, df_bool, columns=export_columns)
 
 
 def start_crs_jobs(mol_list: Iterable[Molecule],
@@ -116,7 +119,7 @@ def start_crs_jobs(mol_list: Iterable[Molecule],
     return ret
 
 
-def get_solvent_columns(solvent_list: Iterable[str]) -> List[Tuple[str, str]]:
+def get_solvent_columns(solvent_list: Iterable[str]) -> pd.MultiIndex:
     """Create a list of column names from an iterable containing .coskf names.
 
     Parameters
@@ -134,7 +137,8 @@ def get_solvent_columns(solvent_list: Iterable[str]) -> List[Tuple[str, str]]:
     """
     # Use filenames without extensions are absolute paths
     clm_tups = [os.path.basename(i).rsplit('.', maxsplit=1)[0] for i in solvent_list]
-    return list(product(('E_solv', 'gamma'), clm_tups))
+    super_keys = ('E_solv', 'gamma')
+    return pd.MultiIndex.from_product((super_keys, clm_tups))
 
 
 def get_solvent_list(solvent_list: Optional[Sequence[str]] = None) -> Sequence[str]:

diff --git a/nanoCAT/mol_bulk.py b/nanoCAT/mol_bulk.py
@@ -33,7 +33,7 @@
 from scm.plams import Molecule
 
 from CAT.settings_dataframe import SettingsDataFrame
-from CAT.workflows import WorkFlow, MOL
+from CAT.workflows import WorkFlow, MOL, V_BULK
 
 __all__ = ['init_lig_bulkiness']
 
@@ -70,14 +70,17 @@ def init_lig_bulkiness(qd_df: SettingsDataFrame, ligand_df: SettingsDataFrame,
     """
     workflow = WorkFlow.from_template(qd_df, name='bulkiness')
     workflow.keep_files = False
+    qd_df[V_BULK] = 0.0
 
     # Import from the database and start the calculation
-    idx = workflow.from_db(qd_df)
-    workflow(start_lig_bulkiness, qd_df, index=idx,
+    df_bool = workflow.from_db(qd_df, V_BULK[0])
+    if V_BULK not in df_bool.columns:
+        df_bool[V_BULK] = True
+    workflow(start_lig_bulkiness, qd_df, index=df_bool[V_BULK],
              lig_series=ligand_df[MOL], core_series=core_df[MOL])
 
     # Export to the database
-    workflow.to_db(qd_df, index=idx)
+    workflow.to_db(qd_df, df_bool, columns=workflow.export_columns)
 
 
 def start_lig_bulkiness(qd_series: pd.Series, lig_series: pd.Series, core_series: pd.Series,

diff --git a/setup.py b/setup.py
@@ -30,7 +30,7 @@
         'nanoCAT.recipes'
     ],
     package_dir={'nanoCAT': 'nanoCAT'},
-    package_data={'nanoCAT': ['data/*csv', 'py.typed', '*.pyi']},
+    package_data={'nanoCAT': ['py.typed', '*.pyi']},
     include_package_data=True,
     license='GNU Lesser General Public License v3 or later',
     zip_safe=False,
@@ -80,7 +80,6 @@
         'test': ['pytest',
                  'pytest-cov',
                  'pytest-mock',
-                 'pycodestyle'],
-        'doc': ['sphinx', 'sphinx_rtd_theme', 'sphinx-autodoc-typehints']
+                 'pycodestyle']
     }
 )