diff --git a/README.rst b/README.rst index 27e66fe..5a001de 100644 --- a/README.rst +++ b/README.rst @@ -12,7 +12,7 @@ ############## -Nano-CAT 0.6.1 +Nano-CAT 0.7.0 ############## **Nano-CAT** is a collection of tools for the analysis of nanocrystals, diff --git a/nanoCAT/__version__.py b/nanoCAT/__version__.py index 8411e55..a71c5c7 100644 --- a/nanoCAT/__version__.py +++ b/nanoCAT/__version__.py @@ -1 +1 @@ -__version__ = '0.6.1' +__version__ = '0.7.0' diff --git a/nanoCAT/asa/asa.py b/nanoCAT/asa/asa.py index 8e57577..11c7712 100644 --- a/nanoCAT/asa/asa.py +++ b/nanoCAT/asa/asa.py @@ -53,9 +53,17 @@ def init_asa(qd_df: SettingsDataFrame) -> None: """ workflow = WorkFlow.from_template(qd_df, name='asa') + columns = workflow.import_columns.keys() + for i in columns: + qd_df[i] = 0.0 # Run the activation strain workflow - idx = workflow.from_db(qd_df) + df_bool = workflow.from_db(qd_df, 'ASA') + columns_subset = columns - df_bool.columns + for i in columns_subset: + df_bool[i] = True + + idx = df_bool['ASA'].all(axis=1) if workflow.md: workflow(get_asa_md, qd_df, index=idx) else: @@ -63,8 +71,7 @@ def init_asa(qd_df: SettingsDataFrame) -> None: # Prepare for results exporting qd_df[JOB_SETTINGS_ASA] = workflow.pop_job_settings(qd_df[MOL]) - job_recipe = workflow.get_recipe() - workflow.to_db(qd_df, index=idx, job_recipe=job_recipe) + workflow.to_db(qd_df, df_bool, columns=workflow.export_columns) def get_asa_energy(mol_list: Iterable[Molecule], diff --git a/nanoCAT/bde/bde_workflow.py b/nanoCAT/bde/bde_workflow.py index 1f0c4f5..aeea499 100644 --- a/nanoCAT/bde/bde_workflow.py +++ b/nanoCAT/bde/bde_workflow.py @@ -32,6 +32,7 @@ from itertools import product import numpy as np +import pandas as pd from scm.plams import AMSJob, Molecule, Settings, Cp2kJob from scm.plams.core.basejob import Job @@ -45,6 +46,12 @@ from .dissociate_xyn import dissociate_ligand from ..qd_opt_ff import qd_opt_ff +try: + import h5py + _LABEL_DTYPE = h5py.string_dtype(encoding='ascii') +except ImportError: + _LABEL_DTYPE = np.dtype(object) + __all__ = ['init_bde'] @@ -55,10 +62,17 @@ def init_bde(qd_df: SettingsDataFrame) -> None: # Create columns columns = _construct_columns(workflow, qd_df[MOL]) - import_columns = {(i, j): (np.nan if i != 'label' else None) for i, j in columns} + columns.names = qd_df.columns.names + for i, j in columns: + if i == 'label': + qd_df[i, j] = np.array('', dtype=_LABEL_DTYPE).take(0) + else: + qd_df[i, j] = 0.0 # Pull from the database; push unoptimized structures - idx = workflow.from_db(qd_df, columns=import_columns) + df_bool = workflow.from_db(qd_df, columns=columns.levels[0]) + + idx = df_bool[columns].all() workflow(start_bde, qd_df, columns=columns, index=idx, workflow=workflow) # Convert the datatype from object back to float @@ -72,11 +86,10 @@ def init_bde(qd_df: SettingsDataFrame) -> None: qd_df[JOB_SETTINGS_BDE] = workflow.pop_job_settings(qd_df[MOL]) # Push the optimized structures to the database - job_recipe = workflow.get_recipe() - workflow.to_db(qd_df, index=idx, columns=columns, job_recipe=job_recipe) + workflow.to_db(qd_df, df_bool, columns=columns) -def _construct_columns(workflow: WorkFlow, mol_list: Iterable[Molecule]) -> List[Tuple[str, str]]: +def _construct_columns(workflow: WorkFlow, mol_list: Iterable[Molecule]) -> pd.MultiIndex: """Construct BDE columns for :func:`init_bde`.""" if workflow.core_index: stop = len(workflow.core_index) @@ -94,8 +107,8 @@ def _construct_columns(workflow: WorkFlow, mol_list: Iterable[Molecule]) -> List if workflow.jobs[1]: # i.e. thermochemical corrections are enabled super_keys += ('BDE ddG', 'BDE dG') - sub_keys = np.arange(stop).astype(dtype=str) - return list(product(super_keys, sub_keys)) + sub_keys = np.arange(stop) + return pd.MultiIndex.from_product((super_keys, sub_keys)) def start_bde(mol_list: Iterable[Molecule], diff --git a/nanoCAT/bde/dissociate_xyn.py b/nanoCAT/bde/dissociate_xyn.py index eff2b87..2f8e061 100644 --- a/nanoCAT/bde/dissociate_xyn.py +++ b/nanoCAT/bde/dissociate_xyn.py @@ -32,8 +32,8 @@ from itertools import chain, combinations from typing import ( - Union, Mapping, Iterable, Tuple, Dict, List, Optional, FrozenSet, Generator, - Any, TypeVar, SupportsInt, Set, Collection + Union, Mapping, Iterable, Tuple, Dict, List, Optional, Set, Generator, + Any, TypeVar, SupportsInt, Set, Collection, FrozenSet, cast ) import numpy as np @@ -46,7 +46,7 @@ from CAT.utils import iter_repeat from CAT.mol_utils import to_atnum from CAT.attachment.ligand_anchoring import _smiles_to_rdmol -from nanoutils import group_by_values +from nanoutils import group_by_values, as_nd_array from .guess_core_dist import guess_core_core_dist from .identify_surface import identify_surface @@ -192,7 +192,7 @@ def dissociate_ligand(mol: Molecule, def _lig_mapping(mol: Molecule, idx: Iterable[int]) -> IdxMapping: """Map **idx** to all atoms with the same residue number.""" - idx = as_array(idx, dtype=int) # 1-based indices + idx = as_nd_array(idx, dtype=int) # 1-based indices iterator = ((i, at.properties.pdb_info.get('ResidueNumber', i)) for i, at in enumerate(mol, 1)) lig_mapping = group_by_values(iterator) @@ -203,7 +203,7 @@ def _lig_mapping(mol: Molecule, idx: Iterable[int]) -> IdxMapping: def _core_mapping(mol: Molecule, idx: Iterable[int], smiles: str) -> IdxMapping: """Map **idx** to all atoms part of the same substructure (see **smiles**).""" - idx = as_array(idx, dtype=int) # 1-based indices + idx = as_nd_array(idx, dtype=int) # 1-based indices rdmol = molkit.to_rdmol(mol) rd_smiles = _smiles_to_rdmol(smiles) @@ -222,7 +222,7 @@ def _core_mapping(mol: Molecule, idx: Iterable[int], smiles: str) -> IdxMapping: class DummyGetter: """A mapping placeholder; calling `__getitem__` will return the supplied key embedded within a tuple.""" # noqa - def __getitem__(self, key: SupportsInt) -> Tuple[int]: return (key,) + def __getitem__(self, key: T) -> Tuple[T]: return (key,) _DUMMY_GETTER = DummyGetter() @@ -297,7 +297,7 @@ def core_idx(self) -> np.ndarray: return self._core_idx @core_idx.setter def core_idx(self, value: Union[int, Iterable[int]]) -> None: - self._core_idx = core_idx = as_array(value, dtype=int, ndmin=1, copy=True) + self._core_idx = core_idx = as_nd_array(value, dtype=int, ndmin=1, copy=True) core_idx -= 1 core_idx.sort() @@ -319,7 +319,7 @@ def topology(self) -> Mapping[int, str]: return self._topology def topology(self, value: Optional[Mapping[int, str]]) -> None: self._topology = value or {} - _PRIVATE_ATTR: FrozenSet[str] = frozenset({'_coords'}) + _PRIVATE_ATTR: Set[str] = frozenset({'_coords'}) # type: ignore def __init__(self, mol: Molecule, core_idx: Union[int, Iterable[int]], @@ -462,7 +462,7 @@ def get_pairs_closest(self, lig_idx: Union[int, Iterable[int]], # Extract instance variables xyz: np.ndarray = self._coords i: np.ndarray = self.core_idx - j: np.ndarray = as_array(lig_idx, dtype=int) - 1 + j: np.ndarray = as_nd_array(lig_idx, dtype=int) - 1 n: int = self.ligand_count # Find all core atoms within a radius **max_dist** from a ligand @@ -516,7 +516,7 @@ def get_pairs_distance(self, lig_idx: Union[int, Iterable[int]], # Extract instance variables xyz: np.ndarray = self._coords i: np.ndarray = self.core_idx - j: np.ndarray = as_array(lig_idx, dtype=int) - 1 + j: np.ndarray = as_nd_array(lig_idx, dtype=int) - 1 n: int = self.ligand_count # Find all core atoms within a radius **max_dist** from a ligand @@ -655,20 +655,3 @@ def _get_new_indices(self, core_is_lig: bool = False) -> List[int]: for i in ret: i -= 1 return ret - - -def as_array(iterable: Iterable, dtype: Union[None, str, type, np.dtype] = None, - copy: bool = False, ndmin: int = 0) -> np.ndarray: - """Convert a generic iterable (including iterators) into a NumPy array. - - See :func:`numpy.array` for an extensive description of all parameters. - - """ - try: - ret = np.array(iterable, dtype=dtype, copy=copy) - except TypeError: # **iterable** is an iterator - ret = np.fromiter(iterable, dtype=dtype) - - if ret.ndim < ndmin: - ret.shape += (1,) * (ndmin - ret.ndim) - return ret diff --git a/nanoCAT/cdft.py b/nanoCAT/cdft.py index c288167..4d12d4e 100644 --- a/nanoCAT/cdft.py +++ b/nanoCAT/cdft.py @@ -8,7 +8,7 @@ from qmflows.packages.SCM import ADF_Result from scm.plams import Molecule, Settings, ADFJob, ADFResults, Units, Results from scm.plams.core.basejob import Job -from CAT.workflows import WorkFlow, JOB_SETTINGS_CDFT, MOL +from CAT.workflows import WorkFlow, JOB_SETTINGS_CDFT, MOL, CDFT_CHI from CAT.jobs import job_single_point from CAT.settings_dataframe import SettingsDataFrame @@ -34,6 +34,8 @@ cdft.specific.adf = _templates.singlepoint.specific.adf.copy() cdft += Settings(yaml.safe_load(_CDFT)) +CDFT = CDFT_CHI[0] + def init_cdft(ligand_df: SettingsDataFrame) -> None: r"""Initialize the ligand conceptual dft (CDFT) workflow. @@ -45,9 +47,16 @@ def init_cdft(ligand_df: SettingsDataFrame) -> None: """ workflow = WorkFlow.from_template(ligand_df, name='cdft') + for k, v in workflow.import_columns.items(): + ligand_df[k] = v # Import from the database and start the calculation - idx = workflow.from_db(ligand_df) + df_bool = workflow.from_db(ligand_df, CDFT) + column_subset = workflow.import_columns.keys() - df_bool.columns + for k in column_subset: + df_bool[k] = True + + idx = df_bool[CDFT].any(axis=1) workflow(start_crs_jobs, ligand_df, index=idx) # Sets a nested list with the filenames of .in files @@ -55,8 +64,7 @@ def init_cdft(ligand_df: SettingsDataFrame) -> None: ligand_df[JOB_SETTINGS_CDFT] = workflow.pop_job_settings(ligand_df[MOL]) # Export to the database - job_recipe = workflow.get_recipe() - workflow.to_db(ligand_df, index=idx, job_recipe=job_recipe) + workflow.to_db(ligand_df, df_bool, columns=workflow.export_columns) def start_crs_jobs(mol_list: Iterable[Molecule], diff --git a/nanoCAT/ligand_solvation.py b/nanoCAT/ligand_solvation.py index 7c13b88..7831020 100644 --- a/nanoCAT/ligand_solvation.py +++ b/nanoCAT/ligand_solvation.py @@ -40,6 +40,7 @@ from typing import Optional, Sequence, Collection, Tuple, List, Iterable, Any, Type, Iterator import numpy as np +import pandas as pd from scm.plams import Settings, Molecule, Results, CRSJob, CRSResults, JobRunner, ADFJob from scm.plams.core.basejob import Job @@ -77,20 +78,22 @@ def init_solv(ligand_df: SettingsDataFrame, # Create column slices solvent_list = get_solvent_list(solvent_list) columns = get_solvent_columns(solvent_list) - - # Create new import and export columns - import_columns = {k: np.nan for k in columns} - import_columns.update(workflow.import_columns) - export_columns = columns + list(workflow.import_columns) + for i in columns: + ligand_df[i] = 0.0 # Create index slices and run the workflow - idx = workflow.from_db(ligand_df, columns=import_columns) + df_bool = workflow.from_db(ligand_df, *columns.levels[0].values) + column_subset = columns.difference(df_bool.columns) + for i in column_subset: + df_bool[i] = True + + idx = df_bool[columns].any(axis=1) workflow(start_crs_jobs, ligand_df, index=idx, columns=columns, solvent_list=solvent_list) # Export results back to the database - job_recipe = workflow.get_recipe() ligand_df[JOB_SETTINGS_CRS] = workflow.pop_job_settings(ligand_df[MOL]) - workflow.to_db(ligand_df, index=idx, columns=export_columns, job_recipe=job_recipe) + export_columns = columns.append(pd.Index([JOB_SETTINGS_CRS])) + workflow.to_db(ligand_df, df_bool, columns=export_columns) def start_crs_jobs(mol_list: Iterable[Molecule], @@ -116,7 +119,7 @@ def start_crs_jobs(mol_list: Iterable[Molecule], return ret -def get_solvent_columns(solvent_list: Iterable[str]) -> List[Tuple[str, str]]: +def get_solvent_columns(solvent_list: Iterable[str]) -> pd.MultiIndex: """Create a list of column names from an iterable containing .coskf names. Parameters @@ -134,7 +137,8 @@ def get_solvent_columns(solvent_list: Iterable[str]) -> List[Tuple[str, str]]: """ # Use filenames without extensions are absolute paths clm_tups = [os.path.basename(i).rsplit('.', maxsplit=1)[0] for i in solvent_list] - return list(product(('E_solv', 'gamma'), clm_tups)) + super_keys = ('E_solv', 'gamma') + return pd.MultiIndex.from_product((super_keys, clm_tups)) def get_solvent_list(solvent_list: Optional[Sequence[str]] = None) -> Sequence[str]: diff --git a/nanoCAT/mol_bulk.py b/nanoCAT/mol_bulk.py index 0021849..ad95795 100644 --- a/nanoCAT/mol_bulk.py +++ b/nanoCAT/mol_bulk.py @@ -33,7 +33,7 @@ from scm.plams import Molecule from CAT.settings_dataframe import SettingsDataFrame -from CAT.workflows import WorkFlow, MOL +from CAT.workflows import WorkFlow, MOL, V_BULK __all__ = ['init_lig_bulkiness'] @@ -70,14 +70,17 @@ def init_lig_bulkiness(qd_df: SettingsDataFrame, ligand_df: SettingsDataFrame, """ workflow = WorkFlow.from_template(qd_df, name='bulkiness') workflow.keep_files = False + qd_df[V_BULK] = 0.0 # Import from the database and start the calculation - idx = workflow.from_db(qd_df) - workflow(start_lig_bulkiness, qd_df, index=idx, + df_bool = workflow.from_db(qd_df, V_BULK[0]) + if V_BULK not in df_bool.columns: + df_bool[V_BULK] = True + workflow(start_lig_bulkiness, qd_df, index=df_bool[V_BULK], lig_series=ligand_df[MOL], core_series=core_df[MOL]) # Export to the database - workflow.to_db(qd_df, index=idx) + workflow.to_db(qd_df, df_bool, columns=workflow.export_columns) def start_lig_bulkiness(qd_series: pd.Series, lig_series: pd.Series, core_series: pd.Series, diff --git a/setup.py b/setup.py index 7becdb2..26ff034 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ 'nanoCAT.recipes' ], package_dir={'nanoCAT': 'nanoCAT'}, - package_data={'nanoCAT': ['data/*csv', 'py.typed', '*.pyi']}, + package_data={'nanoCAT': ['py.typed', '*.pyi']}, include_package_data=True, license='GNU Lesser General Public License v3 or later', zip_safe=False, @@ -80,7 +80,6 @@ 'test': ['pytest', 'pytest-cov', 'pytest-mock', - 'pycodestyle'], - 'doc': ['sphinx', 'sphinx_rtd_theme', 'sphinx-autodoc-typehints'] + 'pycodestyle'] } )