Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@


##############
Nano-CAT 0.6.1
Nano-CAT 0.7.0
##############

**Nano-CAT** is a collection of tools for the analysis of nanocrystals,
Expand Down
2 changes: 1 addition & 1 deletion nanoCAT/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.6.1'
__version__ = '0.7.0'
13 changes: 10 additions & 3 deletions nanoCAT/asa/asa.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,18 +53,25 @@ def init_asa(qd_df: SettingsDataFrame) -> None:

"""
workflow = WorkFlow.from_template(qd_df, name='asa')
columns = workflow.import_columns.keys()
for i in columns:
qd_df[i] = 0.0

# Run the activation strain workflow
idx = workflow.from_db(qd_df)
df_bool = workflow.from_db(qd_df, 'ASA')
columns_subset = columns - df_bool.columns
for i in columns_subset:
df_bool[i] = True

idx = df_bool['ASA'].all(axis=1)
if workflow.md:
workflow(get_asa_md, qd_df, index=idx)
else:
workflow(get_asa_energy, qd_df, index=idx)

# Prepare for results exporting
qd_df[JOB_SETTINGS_ASA] = workflow.pop_job_settings(qd_df[MOL])
job_recipe = workflow.get_recipe()
workflow.to_db(qd_df, index=idx, job_recipe=job_recipe)
workflow.to_db(qd_df, df_bool, columns=workflow.export_columns)


def get_asa_energy(mol_list: Iterable[Molecule],
Expand Down
27 changes: 20 additions & 7 deletions nanoCAT/bde/bde_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from itertools import product

import numpy as np
import pandas as pd

from scm.plams import AMSJob, Molecule, Settings, Cp2kJob
from scm.plams.core.basejob import Job
Expand All @@ -45,6 +46,12 @@
from .dissociate_xyn import dissociate_ligand
from ..qd_opt_ff import qd_opt_ff

try:
import h5py
_LABEL_DTYPE = h5py.string_dtype(encoding='ascii')
except ImportError:
_LABEL_DTYPE = np.dtype(object)

__all__ = ['init_bde']


Expand All @@ -55,10 +62,17 @@ def init_bde(qd_df: SettingsDataFrame) -> None:

# Create columns
columns = _construct_columns(workflow, qd_df[MOL])
import_columns = {(i, j): (np.nan if i != 'label' else None) for i, j in columns}
columns.names = qd_df.columns.names
for i, j in columns:
if i == 'label':
qd_df[i, j] = np.array('', dtype=_LABEL_DTYPE).take(0)
else:
qd_df[i, j] = 0.0

# Pull from the database; push unoptimized structures
idx = workflow.from_db(qd_df, columns=import_columns)
df_bool = workflow.from_db(qd_df, columns=columns.levels[0])

idx = df_bool[columns].all()
workflow(start_bde, qd_df, columns=columns, index=idx, workflow=workflow)

# Convert the datatype from object back to float
Expand All @@ -72,11 +86,10 @@ def init_bde(qd_df: SettingsDataFrame) -> None:
qd_df[JOB_SETTINGS_BDE] = workflow.pop_job_settings(qd_df[MOL])

# Push the optimized structures to the database
job_recipe = workflow.get_recipe()
workflow.to_db(qd_df, index=idx, columns=columns, job_recipe=job_recipe)
workflow.to_db(qd_df, df_bool, columns=columns)


def _construct_columns(workflow: WorkFlow, mol_list: Iterable[Molecule]) -> List[Tuple[str, str]]:
def _construct_columns(workflow: WorkFlow, mol_list: Iterable[Molecule]) -> pd.MultiIndex:
"""Construct BDE columns for :func:`init_bde`."""
if workflow.core_index:
stop = len(workflow.core_index)
Expand All @@ -94,8 +107,8 @@ def _construct_columns(workflow: WorkFlow, mol_list: Iterable[Molecule]) -> List
if workflow.jobs[1]: # i.e. thermochemical corrections are enabled
super_keys += ('BDE ddG', 'BDE dG')

sub_keys = np.arange(stop).astype(dtype=str)
return list(product(super_keys, sub_keys))
sub_keys = np.arange(stop)
return pd.MultiIndex.from_product((super_keys, sub_keys))


def start_bde(mol_list: Iterable[Molecule],
Expand Down
37 changes: 10 additions & 27 deletions nanoCAT/bde/dissociate_xyn.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@

from itertools import chain, combinations
from typing import (
Union, Mapping, Iterable, Tuple, Dict, List, Optional, FrozenSet, Generator,
Any, TypeVar, SupportsInt, Set, Collection
Union, Mapping, Iterable, Tuple, Dict, List, Optional, Set, Generator,
Any, TypeVar, SupportsInt, Set, Collection, FrozenSet, cast
)

import numpy as np
Expand All @@ -46,7 +46,7 @@
from CAT.utils import iter_repeat
from CAT.mol_utils import to_atnum
from CAT.attachment.ligand_anchoring import _smiles_to_rdmol
from nanoutils import group_by_values
from nanoutils import group_by_values, as_nd_array

from .guess_core_dist import guess_core_core_dist
from .identify_surface import identify_surface
Expand Down Expand Up @@ -192,7 +192,7 @@ def dissociate_ligand(mol: Molecule,

def _lig_mapping(mol: Molecule, idx: Iterable[int]) -> IdxMapping:
"""Map **idx** to all atoms with the same residue number."""
idx = as_array(idx, dtype=int) # 1-based indices
idx = as_nd_array(idx, dtype=int) # 1-based indices

iterator = ((i, at.properties.pdb_info.get('ResidueNumber', i)) for i, at in enumerate(mol, 1))
lig_mapping = group_by_values(iterator)
Expand All @@ -203,7 +203,7 @@ def _lig_mapping(mol: Molecule, idx: Iterable[int]) -> IdxMapping:

def _core_mapping(mol: Molecule, idx: Iterable[int], smiles: str) -> IdxMapping:
"""Map **idx** to all atoms part of the same substructure (see **smiles**)."""
idx = as_array(idx, dtype=int) # 1-based indices
idx = as_nd_array(idx, dtype=int) # 1-based indices

rdmol = molkit.to_rdmol(mol)
rd_smiles = _smiles_to_rdmol(smiles)
Expand All @@ -222,7 +222,7 @@ def _core_mapping(mol: Molecule, idx: Iterable[int], smiles: str) -> IdxMapping:
class DummyGetter:
"""A mapping placeholder; calling `__getitem__` will return the supplied key embedded within a tuple.""" # noqa

def __getitem__(self, key: SupportsInt) -> Tuple[int]: return (key,)
def __getitem__(self, key: T) -> Tuple[T]: return (key,)


_DUMMY_GETTER = DummyGetter()
Expand Down Expand Up @@ -297,7 +297,7 @@ def core_idx(self) -> np.ndarray: return self._core_idx

@core_idx.setter
def core_idx(self, value: Union[int, Iterable[int]]) -> None:
self._core_idx = core_idx = as_array(value, dtype=int, ndmin=1, copy=True)
self._core_idx = core_idx = as_nd_array(value, dtype=int, ndmin=1, copy=True)
core_idx -= 1
core_idx.sort()

Expand All @@ -319,7 +319,7 @@ def topology(self) -> Mapping[int, str]: return self._topology
def topology(self, value: Optional[Mapping[int, str]]) -> None:
self._topology = value or {}

_PRIVATE_ATTR: FrozenSet[str] = frozenset({'_coords'})
_PRIVATE_ATTR: Set[str] = frozenset({'_coords'}) # type: ignore

def __init__(self, mol: Molecule,
core_idx: Union[int, Iterable[int]],
Expand Down Expand Up @@ -462,7 +462,7 @@ def get_pairs_closest(self, lig_idx: Union[int, Iterable[int]],
# Extract instance variables
xyz: np.ndarray = self._coords
i: np.ndarray = self.core_idx
j: np.ndarray = as_array(lig_idx, dtype=int) - 1
j: np.ndarray = as_nd_array(lig_idx, dtype=int) - 1
n: int = self.ligand_count

# Find all core atoms within a radius **max_dist** from a ligand
Expand Down Expand Up @@ -516,7 +516,7 @@ def get_pairs_distance(self, lig_idx: Union[int, Iterable[int]],
# Extract instance variables
xyz: np.ndarray = self._coords
i: np.ndarray = self.core_idx
j: np.ndarray = as_array(lig_idx, dtype=int) - 1
j: np.ndarray = as_nd_array(lig_idx, dtype=int) - 1
n: int = self.ligand_count

# Find all core atoms within a radius **max_dist** from a ligand
Expand Down Expand Up @@ -655,20 +655,3 @@ def _get_new_indices(self, core_is_lig: bool = False) -> List[int]:
for i in ret:
i -= 1
return ret


def as_array(iterable: Iterable, dtype: Union[None, str, type, np.dtype] = None,
copy: bool = False, ndmin: int = 0) -> np.ndarray:
"""Convert a generic iterable (including iterators) into a NumPy array.

See :func:`numpy.array` for an extensive description of all parameters.

"""
try:
ret = np.array(iterable, dtype=dtype, copy=copy)
except TypeError: # **iterable** is an iterator
ret = np.fromiter(iterable, dtype=dtype)

if ret.ndim < ndmin:
ret.shape += (1,) * (ndmin - ret.ndim)
return ret
16 changes: 12 additions & 4 deletions nanoCAT/cdft.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from qmflows.packages.SCM import ADF_Result
from scm.plams import Molecule, Settings, ADFJob, ADFResults, Units, Results
from scm.plams.core.basejob import Job
from CAT.workflows import WorkFlow, JOB_SETTINGS_CDFT, MOL
from CAT.workflows import WorkFlow, JOB_SETTINGS_CDFT, MOL, CDFT_CHI
from CAT.jobs import job_single_point
from CAT.settings_dataframe import SettingsDataFrame

Expand All @@ -34,6 +34,8 @@
cdft.specific.adf = _templates.singlepoint.specific.adf.copy()
cdft += Settings(yaml.safe_load(_CDFT))

CDFT = CDFT_CHI[0]


def init_cdft(ligand_df: SettingsDataFrame) -> None:
r"""Initialize the ligand conceptual dft (CDFT) workflow.
Expand All @@ -45,18 +47,24 @@ def init_cdft(ligand_df: SettingsDataFrame) -> None:

"""
workflow = WorkFlow.from_template(ligand_df, name='cdft')
for k, v in workflow.import_columns.items():
ligand_df[k] = v

# Import from the database and start the calculation
idx = workflow.from_db(ligand_df)
df_bool = workflow.from_db(ligand_df, CDFT)
column_subset = workflow.import_columns.keys() - df_bool.columns
for k in column_subset:
df_bool[k] = True

idx = df_bool[CDFT].any(axis=1)
workflow(start_crs_jobs, ligand_df, index=idx)

# Sets a nested list with the filenames of .in files
# This cannot be done with loc is it will try to expand the list into a 2D array
ligand_df[JOB_SETTINGS_CDFT] = workflow.pop_job_settings(ligand_df[MOL])

# Export to the database
job_recipe = workflow.get_recipe()
workflow.to_db(ligand_df, index=idx, job_recipe=job_recipe)
workflow.to_db(ligand_df, df_bool, columns=workflow.export_columns)


def start_crs_jobs(mol_list: Iterable[Molecule],
Expand Down
24 changes: 14 additions & 10 deletions nanoCAT/ligand_solvation.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from typing import Optional, Sequence, Collection, Tuple, List, Iterable, Any, Type, Iterator

import numpy as np
import pandas as pd

from scm.plams import Settings, Molecule, Results, CRSJob, CRSResults, JobRunner, ADFJob
from scm.plams.core.basejob import Job
Expand Down Expand Up @@ -77,20 +78,22 @@ def init_solv(ligand_df: SettingsDataFrame,
# Create column slices
solvent_list = get_solvent_list(solvent_list)
columns = get_solvent_columns(solvent_list)

# Create new import and export columns
import_columns = {k: np.nan for k in columns}
import_columns.update(workflow.import_columns)
export_columns = columns + list(workflow.import_columns)
for i in columns:
ligand_df[i] = 0.0

# Create index slices and run the workflow
idx = workflow.from_db(ligand_df, columns=import_columns)
df_bool = workflow.from_db(ligand_df, *columns.levels[0].values)
column_subset = columns.difference(df_bool.columns)
for i in column_subset:
df_bool[i] = True

idx = df_bool[columns].any(axis=1)
workflow(start_crs_jobs, ligand_df, index=idx, columns=columns, solvent_list=solvent_list)

# Export results back to the database
job_recipe = workflow.get_recipe()
ligand_df[JOB_SETTINGS_CRS] = workflow.pop_job_settings(ligand_df[MOL])
workflow.to_db(ligand_df, index=idx, columns=export_columns, job_recipe=job_recipe)
export_columns = columns.append(pd.Index([JOB_SETTINGS_CRS]))
workflow.to_db(ligand_df, df_bool, columns=export_columns)


def start_crs_jobs(mol_list: Iterable[Molecule],
Expand All @@ -116,7 +119,7 @@ def start_crs_jobs(mol_list: Iterable[Molecule],
return ret


def get_solvent_columns(solvent_list: Iterable[str]) -> List[Tuple[str, str]]:
def get_solvent_columns(solvent_list: Iterable[str]) -> pd.MultiIndex:
"""Create a list of column names from an iterable containing .coskf names.

Parameters
Expand All @@ -134,7 +137,8 @@ def get_solvent_columns(solvent_list: Iterable[str]) -> List[Tuple[str, str]]:
"""
# Use filenames without extensions are absolute paths
clm_tups = [os.path.basename(i).rsplit('.', maxsplit=1)[0] for i in solvent_list]
return list(product(('E_solv', 'gamma'), clm_tups))
super_keys = ('E_solv', 'gamma')
return pd.MultiIndex.from_product((super_keys, clm_tups))


def get_solvent_list(solvent_list: Optional[Sequence[str]] = None) -> Sequence[str]:
Expand Down
11 changes: 7 additions & 4 deletions nanoCAT/mol_bulk.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from scm.plams import Molecule

from CAT.settings_dataframe import SettingsDataFrame
from CAT.workflows import WorkFlow, MOL
from CAT.workflows import WorkFlow, MOL, V_BULK

__all__ = ['init_lig_bulkiness']

Expand Down Expand Up @@ -70,14 +70,17 @@ def init_lig_bulkiness(qd_df: SettingsDataFrame, ligand_df: SettingsDataFrame,
"""
workflow = WorkFlow.from_template(qd_df, name='bulkiness')
workflow.keep_files = False
qd_df[V_BULK] = 0.0

# Import from the database and start the calculation
idx = workflow.from_db(qd_df)
workflow(start_lig_bulkiness, qd_df, index=idx,
df_bool = workflow.from_db(qd_df, V_BULK[0])
if V_BULK not in df_bool.columns:
df_bool[V_BULK] = True
workflow(start_lig_bulkiness, qd_df, index=df_bool[V_BULK],
lig_series=ligand_df[MOL], core_series=core_df[MOL])

# Export to the database
workflow.to_db(qd_df, index=idx)
workflow.to_db(qd_df, df_bool, columns=workflow.export_columns)


def start_lig_bulkiness(qd_series: pd.Series, lig_series: pd.Series, core_series: pd.Series,
Expand Down
5 changes: 2 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
'nanoCAT.recipes'
],
package_dir={'nanoCAT': 'nanoCAT'},
package_data={'nanoCAT': ['data/*csv', 'py.typed', '*.pyi']},
package_data={'nanoCAT': ['py.typed', '*.pyi']},
include_package_data=True,
license='GNU Lesser General Public License v3 or later',
zip_safe=False,
Expand Down Expand Up @@ -80,7 +80,6 @@
'test': ['pytest',
'pytest-cov',
'pytest-mock',
'pycodestyle'],
'doc': ['sphinx', 'sphinx_rtd_theme', 'sphinx-autodoc-typehints']
'pycodestyle']
}
)