Source code for aim2dat.ml.transformers

"""Scikit learn Transformer classes extracting features from crystals or molecules."""

# Standard library imports
import itertools
import math
from abc import ABC, abstractmethod

# Third party library imports
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_is_fitted

# Internal library imports
from aim2dat.strct import StructureCollection, StructureOperations
from aim2dat.strct.ext_analysis import (
    calculate_warren_cowley_order_p,
    calculate_prdf,
    calculate_interaction_matrix,
    calculate_acsf_descriptor,
    calculate_soap_descriptor,
    calculate_mbtr_descriptor,
)
from aim2dat.strct.strct import _compare_function_args
from aim2dat.utils.chem_formula import transform_list_to_dict
from aim2dat.ml.utils import _get_all_elements


class _BaseStructureTransformer(BaseEstimator, TransformerMixin, ABC):
    """Base class for all structure transformers using the StructureCollection object."""

    _supported_params = []

    def __init__(self, n_procs, chunksize, verbose):
        super().__init__()
        self.n_procs = n_procs
        self.chunksize = chunksize
        self.verbose = verbose

        self._precomp_properties = []

    def __sklearn_clone__(self):
        new_obj = super().__sklearn_clone__()
        for precomp_prop in self.precomputed_properties:
            new_obj.add_precomputed_properties(*precomp_prop)
        return new_obj

    @property
    def precomputed_properties(self):
        """
        list: Precomputed properties given as list of tuples consisting of input parameters and
        StructureOperations object.
        """
        return tuple(self._precomp_properties)

    def clear_precomputed_properties(self):
        """
        Clear all precomputed properties.
        """
        self._precomp_properties = []

    def get_feature_names_out(self, input_features=None):
        """
        Get feature names.
        """
        check_is_fitted(self, "_features_out")
        return self._features_out

    def fit(self, X, y=None):
        """
        Fit function that determines the number of features.

        Parameters
        ----------
        X : list or aim2dat.strct.StructureCollection
            List of structures or StructureCollection.
        y : list (optional)
            list of target property.

        Returns
        -------
        self
            Transformer object.
        """
        self._fit(X, y)
        return self

    def transform(self, X):
        """
        Transform structures to features.

        Parameters
        ----------
        X : list or aim2dat.strct.StructureCollection
            List of structures or StructureCollection.

        Returns
        -------
        numpy.array
            Nested array of features.
        """
        # Logic for precomputed
        input_p = {param: getattr(self, param) for param in self._supported_params}
        strct_op = None
        for input_p0, strct_op0 in self._precomp_properties:
            if _compare_function_args(input_p, input_p0):
                strct_op = strct_op0
                break
        label_list, strct_op = self._create_strct_op(X, strct_op)
        features = self._get_features(label_list, strct_op, input_p)
        return features

    def precompute_parameter_space(self, param_grid, X):
        """
        Precompute and store structural properties to be reused later e.g. for a grid search.

        Parameters
        ----------
        param_grid : list or dict
            Dictionary or list of dictionaries of input parameters.
        X : list or aim2dat.strct.StructureCollection
            List of structures or StructureCollection.
        """
        if len(self._supported_params) == 0:
            # Return None if no expensive structural analysis is needed.
            return None

        label_list, strct_op = self._create_strct_op(X)
        if isinstance(param_grid, dict):
            param_grid = [param_grid]
        for param_grid0 in param_grid:
            complete_grid = []
            for param in self._supported_params:
                if param in param_grid0:
                    complete_grid.append(param_grid0[param])
                else:
                    complete_grid.append([getattr(self, param)])
            for comb in itertools.product(*complete_grid):
                input_p = {p_key: p_val for p_key, p_val in zip(self._supported_params, comb)}
                strct_op_copy = strct_op.copy()
                self._get_strct_op_properties(label_list, strct_op_copy, input_p)
                self.add_precomputed_properties(input_p, strct_op_copy)

    def add_precomputed_properties(self, parameters, structure_operations):
        """
        Add precomputed properties.

        Parameters
        ----------
        parameters : dict
            Dictionary of input parameters.
        structure_operations : StructureOperations
            StructureOperations object storing the properties according to the input parameters.
        """
        parameters = self._complete_parameters(parameters)
        is_added = False
        for idx, (stored_param, _) in enumerate(self._precomp_properties):
            if parameters == stored_param:
                self._precomp_properties[idx] = (parameters, structure_operations)
                is_added = True
                break
        if not is_added:
            self._precomp_properties.append((parameters, structure_operations))

    @abstractmethod
    def _fit(self, X, y):
        pass

    def _get_strct_op_properties(self, label_list, strct_op):
        pass

    @abstractmethod
    def _get_features(self, label_list, strct_op):
        pass

    def _complete_parameters(self, parameters):
        comp_params = {}
        for param in self._supported_params:
            value = getattr(self, param)
            if param in parameters:
                value = parameters[param]
            comp_params[param] = value
        return comp_params

    def _create_strct_op(self, X, strct_op=None):
        if strct_op is None:
            strct_op = StructureOperations(structures=StructureCollection())
        strct_op.n_procs = self.n_procs
        strct_op.chunksize = self.chunksize
        strct_op.verbose = self.verbose
        label_list = []
        for strct in X:
            label_list.append(strct["label"])
            if strct["label"] not in strct_op.structures.labels:
                if isinstance(strct, dict):
                    strct_op.structures.append(**strct)
                else:
                    strct_op.structures.append_structure(strct.copy())
        return label_list, strct_op


class _BaseDscribeTransformer(_BaseStructureTransformer):
    def _fit(self, X, y):
        if self.elements is None:
            self.elements_ = _get_all_elements(X, False)
        else:
            self.elements_ = self.elements

    def _get_features(self, label_list, strct_op, input_p):
        check_is_fitted(self, attributes="elements_")
        input_p["elements"] = self.elements_
        descriptors = self._get_strct_op_properties(label_list, strct_op, input_p)
        features = []
        for label in label_list:
            features.append(descriptors[label])
        return np.array(features)


[docs] class StructureCompositionTransformer(_BaseStructureTransformer): """ Extract fractional concentrations of elements or kinds. Attributes ---------- distinguish_kinds : bool (optional) Whether to use kinds instead of elements. n_procs : int (optional) Number of parallel processes. chunksize : int (optional) Number of structures handed to each process at once. verbose : bool (optional) Whether to print a progress bar. """ def __init__( self, distinguish_kinds=False, n_procs=1, chunksize=50, verbose=True, ): """Initialize object.""" super().__init__(n_procs, chunksize, verbose) self.distinguish_kinds = distinguish_kinds def _fit(self, X, y): self.elements_ = _get_all_elements(X, self.distinguish_kinds) self._features_out = np.asarray(["c_" + el for el in self.elements_], dtype=object) def _get_features(self, label_list, strct_op, _): check_is_fitted(self, attributes="elements_") features = np.zeros((len(label_list), len(self.elements_))) for idx, label in enumerate(label_list): structure = strct_op.structures[label] el_type = "elements" if self.distinguish_kinds: el_type = "kinds" chem_f = transform_list_to_dict(structure[el_type]) n_atoms = sum(chem_f.values()) for el_idx, el in enumerate(self.elements_): if el in chem_f: features[idx][el_idx] = chem_f[el] / n_atoms return features
[docs] class StructureDensityTransformer(_BaseStructureTransformer): """ Extract density of each element or kind. Attributes ---------- distinguish_kinds : bool (optional) Whether to use kinds instead of elements. n_procs : int (optional) Number of parallel processes. chunksize : int (optional) Number of structures handed to each process at once. verbose : bool (optional) Whether to print a progress bar. """ def __init__( self, distinguish_kinds=False, n_procs=1, chunksize=50, verbose=True, ): """Initialize object.""" super().__init__(n_procs, chunksize, verbose) self.distinguish_kinds = distinguish_kinds def _fit(self, X, y): self.elements_ = _get_all_elements(X, self.distinguish_kinds) self._features_out = np.asarray(["density_" + el for el in self.elements_], dtype=object) def _get_features(self, label_list, strct_op, _): check_is_fitted(self, attributes="elements_") features = np.zeros((len(label_list), len(self.elements_))) for idx, label in enumerate(label_list): structure = strct_op.structures[label] if "cell_volume" not in structure: raise ValueError(f"'volume' not available for structure '{label}'.") el_type = "elements" if self.distinguish_kinds: el_type = "kinds" chem_f = transform_list_to_dict(structure[el_type]) for el_idx, el in enumerate(self.elements_): if el in chem_f: features[idx][el_idx] = chem_f[el] / structure["cell_volume"] return features
[docs] class StructureCoordinationTransformer(_BaseStructureTransformer): """ Extract coordination numbers and distances between elements or kinds. Attributes ---------- r_max : float (optional) Cut-off value for the maximum distance between two atoms in angstrom. method : str (optional) Method used to calculate the coordination environment. The default value is ``'minimum_distance'``. min_dist_delta : float (optional) Tolerance parameter that defines the relative distance from the nearest neighbour atom for the ``'minimum_distance'`` method. n_nearest_neighbours : int (optional) Number of neighbours that are considered coordinated for the ``'n_neighbours'`` method. econ_tolerance : float (optional) Tolerance parameter for the econ method. econ_conv_threshold : float (optional) Convergence threshold for the econ method. okeeffe_weight_threshold : float (optional) Threshold parameter to distinguish indirect and direct neighbour atoms for the ``'okeeffe'``. feature_types : tuple or str (optional) Tuple of features that are extracted. Supported options are: ``'nrs_avg'``, ``'nrs_stdev'``, ``'nrs_max'``, ``'nrs_min'``, ``'distance_avg'``, ``'distance_stdev'``, ``'distance_max'`` and ``'distance_min'``. n_procs : int (optional) Number of parallel processes. chunksize : int (optional) Number of structures handed to each process at once. verbose : bool (optional) Whether to print a progress bar. """ # TODO include kinds support _supported_features = ( "nrs_avg", "nrs_stdev", "nrs_max", "nrs_min", "distance_avg", "distance_stdev", "distance_max", "distance_min", "weight_avg", "weight_stdev", "weight_max", "weight_min", ) _supported_params = ( "r_max", "method", "min_dist_delta", "n_nearest_neighbours", "econ_tolerance", "econ_conv_threshold", "voronoi_weight_type", "voronoi_weight_threshold", ) def __init__( self, r_max=15.0, method="minimum_distance", min_dist_delta=0.1, n_nearest_neighbours=5, econ_tolerance=0.5, econ_conv_threshold=0.001, voronoi_weight_type="rel_solid_angle", voronoi_weight_threshold=0.5, feature_types=( "nrs_avg", "nrs_stdev", "nrs_max", "nrs_min", "distance_avg", "distance_stdev", "distance_max", "distance_min", ), n_procs=1, chunksize=50, verbose=True, # precomputed_properties=None, ): """Initialize object.""" super().__init__(n_procs, chunksize, verbose) self.r_max = r_max self.method = method self.min_dist_delta = min_dist_delta self.n_nearest_neighbours = n_nearest_neighbours self.econ_tolerance = econ_tolerance self.econ_conv_threshold = econ_conv_threshold self.voronoi_weight_type = voronoi_weight_type self.voronoi_weight_threshold = voronoi_weight_threshold self.feature_types = feature_types # self.precomputed_properties = precomputed_properties # if precomputed_properties: # for prec_prop in precomputed_properties: # self.add_precomputed_properties(*prec_prop) @property def feature_types(self): """ tuple or str : Feature types that are included. """ return self._feature_types if hasattr(self, "_feature_types") else None @feature_types.setter def feature_types(self, value): if isinstance(value, str): value = (value,) elif isinstance(value, tuple): pass else: raise TypeError("`features` need to be a tuple of strings or a string.") for val in value: if val not in self._supported_features: raise ValueError(f"Feature '{val}' is not supported.") self._feature_types = value def _fit(self, X, y=None): all_elements = sorted(_get_all_elements(X, False)) self.el_pairs_ = list(itertools.product(all_elements, repeat=2)) f_labels = [] for feature in self.feature_types: f_labels += [feature + "_" + "-".join(el_pair) for el_pair in self.el_pairs_] self._features_out = np.asarray(f_labels, dtype=object) def _get_strct_op_properties(self, label_list, strct_op, input_p): return strct_op[label_list].calculate_coordination(**input_p) def _get_features(self, label_list, strct_op, input_p): check_is_fitted(self, attributes="el_pairs_") features = np.zeros((len(label_list), len(self.feature_types) * len(self.el_pairs_))) all_coord_nrs = self._get_strct_op_properties(label_list, strct_op, input_p) for idx, label in enumerate(label_list): coord_nrs = all_coord_nrs[label] for el_pair_idx, el_pair in enumerate(self.el_pairs_): for f_idx, f_label in enumerate(self.feature_types): if el_pair in coord_nrs[f_label]: features[idx][f_idx * len(self.el_pairs_) + el_pair_idx] = coord_nrs[ f_label ][el_pair] return features
[docs] class StructureChemOrderTransformer(_BaseStructureTransformer): # TODO extend to other coordination methods --> introduce weight as parameter. """ Extract Warren Cowley like order parameters for each element as defined in :doi:`10.1103/PhysRevB.96.024104`. Attributes ---------- r_max : float (optional) Cut-off value for the maximum distance between two atoms in angstrom. max_shells : int (optional) Number of neighbour shells that are evaluated. n_procs : int (optional) Number of parallel processes. chunksize : int (optional) Number of structures handed to each process at once. verbose : bool (optional) Whether to print a progress bar. """ _supported_params = ("r_max", "max_shells") def __init__( self, r_max=15.0, max_shells=3, n_procs=1, chunksize=50, verbose=True, ): """Initialize object.""" super().__init__(n_procs, chunksize, verbose) self.r_max = r_max self.max_shells = max_shells def _fit(self, X, y=None): self.elements_ = sorted(_get_all_elements(X, False)) f_labels = [] for el in self.elements_: for shell in range(self.max_shells): f_labels.append(el + "_" + str(shell + 1)) self._features_out = np.asarray(f_labels, dtype=object) def _get_strct_op_properties(self, label_list, strct_op, input_p): return strct_op[label_list].perform_analysis( method=calculate_warren_cowley_order_p, kwargs=input_p ) def _get_features(self, label_list, strct_op, input_p): check_is_fitted(self, attributes="elements_") features = np.zeros((len(label_list), len(self.elements_) * self.max_shells)) all_order_p = self._get_strct_op_properties(label_list, strct_op, input_p) for idx, label in enumerate(label_list): order_p = all_order_p[label] for el_idx, el in enumerate(self.elements_): if el in order_p["order_p"].keys(): for shell_idx in range(self.max_shells): features[idx][el_idx * self.max_shells + shell_idx] = order_p["order_p"][ el ][shell_idx] return features
[docs] class StructureFFPrintTransformer(_BaseStructureTransformer): """ Extract the F-fingerprint for each element-pair as defined in :doi:`10.1103/PhysRevB.96.024104`. Attributes ---------- r_max : float (optional) Cut-off value for the maximum distance between two atoms in angstrom. delta_bin : float (optional) Bin size to descritize the function in angstrom. sigma : float (optional) Smearing parameter for the Gaussian function. distinguish_kinds: bool (optional) Whether different kinds should be distinguished e.g. Ni0 and Ni1 would be considered as different elements if ``True``. add_header : bool Add leading entries that describe the weights and composition for the ffprint kernels. n_procs : int (optional) Number of parallel processes. chunksize : int (optional) Number of structures handed to each process at once. verbose : bool (optional) Whether to print a progress bar. """ _supported_params = ("r_max", "delta_bin", "sigma", "distinguish_kinds") def __init__( self, r_max=15.0, delta_bin=0.005, sigma=10.0, distinguish_kinds=False, add_header=False, use_weights=False, n_procs=1, chunksize=50, verbose=True, ): """Initialize object.""" super().__init__(n_procs, chunksize, verbose) self.r_max = r_max self.delta_bin = delta_bin self.sigma = sigma self.distinguish_kinds = distinguish_kinds self.add_header = add_header self.use_weights = use_weights def _fit(self, X, y=None): self.elements_ = _get_all_elements(X, self.distinguish_kinds) self.el_pairs_ = list(itertools.combinations_with_replacement(self.elements_, 2)) def _get_strct_op_properties(self, label_list, strct_op, input_p): return strct_op[label_list].calculate_ffingerprint(**input_p) def _get_features(self, label_list, strct_op, input_p): check_is_fitted(self, attributes="el_pairs_") len_ffprint = math.ceil(self.r_max / self.delta_bin) header_size = 0 if self.add_header: header_size = len(self.elements_) + 3 features = np.zeros((len(label_list), header_size + len(self.el_pairs_) * len_ffprint)) all_fprints = self._get_strct_op_properties(label_list, strct_op, input_p) for idx, label in enumerate(label_list): fprints = all_fprints[label] features[idx][0] = header_size features[idx][1] = len_ffprint features[idx][2] = len(self.elements_) if not self.use_weights: features[idx][2] *= -1 el_dict = ( transform_list_to_dict(strct_op.structures[label]["kinds"]) if self.distinguish_kinds else transform_list_to_dict(strct_op.structures[label]["elements"]) ) for el_idx, el in enumerate(self.elements_): features[idx][3 + el_idx] = el_dict.get(el, 0.0) for el_pair, fprint in fprints[0]["fingerprints"].items(): if el_pair in self.el_pairs_: start_idx = header_size + self.el_pairs_.index(el_pair) * len_ffprint end_idx = start_idx + len_ffprint features[idx][start_idx:end_idx] = fprint return features
[docs] class StructurePRDFTransformer(_BaseStructureTransformer): """ Extract the partial radial distribution function for each element-pair as defined in :doi:`10.1103/PhysRevB.89.205118`. Attributes ---------- r_max : float (optional) Cut-off value for the maximum distance between two atoms in angstrom. delta_bin : float (optional) Bin size to descritize the function in angstrom. distinguish_kinds: bool (optional) Whether different kinds should be distinguished e.g. Ni0 and Ni1 would be considered as different elements if ``True``. n_procs : int (optional) Number of parallel processes. chunksize : int (optional) Number of structures handed to each process at once. verbose : bool (optional) Whether to print a progress bar. """ _supported_params = ("r_max", "delta_bin", "distinguish_kinds") def __init__( self, r_max=15.0, delta_bin=0.005, distinguish_kinds=False, n_procs=1, chunksize=50, verbose=True, ): """Initialize object.""" super().__init__(n_procs, chunksize, verbose) self.r_max = r_max self.delta_bin = delta_bin self.distinguish_kinds = distinguish_kinds def _fit(self, X, y=None): all_elements = _get_all_elements(X, self.distinguish_kinds) self.el_pairs_ = list(itertools.product(all_elements, repeat=2)) def _get_strct_op_properties(self, label_list, strct_op, input_p): return strct_op[label_list].perform_analysis(method=calculate_prdf, kwargs=input_p) def _get_features(self, label_list, strct_op, input_p): check_is_fitted(self, attributes="el_pairs_") len_prdf = math.ceil(self.r_max / self.delta_bin) features = np.zeros((len(label_list), len(self.el_pairs_) * len_prdf)) all_prdfs = self._get_strct_op_properties(label_list, strct_op, input_p) for idx, label in enumerate(label_list): prdfs = all_prdfs[label][0] for el_pair, prdf in prdfs.items(): if el_pair in self.el_pairs_: start_idx = self.el_pairs_.index(el_pair) * len_prdf end_idx = start_idx + len_prdf features[idx][start_idx:end_idx] = prdf return features
[docs] class StructureMatrixTransformer(_BaseStructureTransformer): """ Extract features based on interaction matrices as defined in :doi:`10.1002/qua.24917`. This transformer class is based on the implementations of the dscribe python package. Attributes ---------- matrix_type : str Matrix type. Supported options are ``'coulomb'``, ``'ewald_sum'`` or ``'sine'``. permutation : str Defines the output format. Options are: ``'none'``, ``'sorted_l2'``, ``'eigenspectrum'`` or ``'random'``. sigma : float Standar deviation of the Gaussian distributed noise when using ``'random'`` for ``permutation``. seed : int Seed for the random numbers in case ``'random'`` is chosen for the ``permutation`` attibute. sparse : bool Whether to return a sparse matrix or a dense 1D array. ewald_accuracy : float Accuracy threshold for the Ewald sum. ewald_w : int Weight parameter. ewald_r_cut : float or None Real space cutoff parameter. ewald_g_cut : float or None Reciprocal space cutoff parameter. ewald_a : float or None Parameter controlling the width of the Gaussian functions. dscribe_n_jobs : int Number of jobs used by dscribe to calculate the interaction matrix. dscribe_only_physical_cores : bool Whether to only use physicsl cores. n_procs : int (optional) Number of parallel processes. chunksize : int (optional) Number of structures handed to each process at once. verbose : bool (optional) Whether to print a progress bar. """ _supported_params = ( "matrix_type", "n_atoms_max", "enforce_real", "permutation", "sigma", "seed", "sparse", "ewald_accuracy", "ewald_w", "ewald_r_cut", "ewald_g_cut", "ewald_a", "dscribe_n_jobs", "dscribe_only_physical_cores", ) def __init__( self, matrix_type="coulomb", n_atoms_max=None, enforce_real=False, permutation="eigenspectrum", sigma=None, seed=None, sparse=False, ewald_accuracy=1.0e-5, ewald_w=1, ewald_r_cut=None, ewald_g_cut=None, ewald_a=None, dscribe_n_jobs=1, dscribe_only_physical_cores=False, n_procs=1, chunksize=50, verbose=True, ): """Initialize object.""" super().__init__(n_procs, chunksize, verbose) self.matrix_type = matrix_type self.n_atoms_max = n_atoms_max self.enforce_real = enforce_real self.permutation = permutation self.sigma = sigma self.seed = seed self.sparse = sparse self.ewald_accuracy = ewald_accuracy self.ewald_w = ewald_w self.ewald_r_cut = ewald_r_cut self.ewald_g_cut = ewald_g_cut self.ewald_a = ewald_a self.dscribe_n_jobs = dscribe_n_jobs self.dscribe_only_physical_cores = dscribe_only_physical_cores def _fit(self, X, y): if self.n_atoms_max is None: self.n_atoms_max_ = max([len(strct["elements"]) for strct in X]) else: self.n_atoms_max_ = self.n_atoms_max def _get_strct_op_properties(self, label_list, strct_op, input_p): return strct_op[label_list].perform_analysis( method=calculate_interaction_matrix, kwargs=input_p ) def _get_features(self, label_list, strct_op, input_p): check_is_fitted(self, attributes="n_atoms_max_") input_p["n_atoms_max"] = self.n_atoms_max_ matrices = self._get_strct_op_properties(label_list, strct_op, input_p) features = [] for label in label_list: features.append(matrices[label]) return np.array(features)
[docs] class StructureACSFTransformer(_BaseDscribeTransformer): """ Extract ACSF descriptor as defined in :doi:`10.1063/1.3553717`. This transformer class is based on the implementations of the dscribe python package. Attributes ---------- r_cut : float Cutoff value. g2_params : np.array List of pairs of eta and R_s values for the G^2 functions. g3_params : np.array List of kappa values for the G^3 functions. g4_params : np.array List of triplets of eta, zeta and lambda values for G^4 functions. g5_params : np.array List of triplets of eta, zeta and lambda values for G^5 functions. elements : list List of atomic numbers or chemical symbols. periodic : bool Whether to consider periodic boundary conditions. sparse : bool Whether to return a sparse matrix or a dense array. dscribe_n_jobs : int Number of jobs used by dscribe to calculate the interaction matrix. dscribe_only_physical_cores : bool Whether to only use physicsl cores. n_procs : int (optional) Number of parallel processes. chunksize : int (optional) Number of structures handed to each process at once. verbose : bool (optional) Whether to print a progress bar. """ _supported_params = ( "r_cut", "g2_params", "g3_params", "g4_params", "g5_params", "elements", "periodic", "sparse", "dscribe_n_jobs", "dscribe_only_physical_cores", ) def __init__( self, r_cut=7.5, g2_params=None, g3_params=None, g4_params=None, g5_params=None, elements=None, periodic=False, sparse=False, dscribe_n_jobs=1, dscribe_only_physical_cores=False, n_procs=1, chunksize=50, verbose=True, ): """Initialize object.""" _BaseStructureTransformer.__init__(self, n_procs, chunksize, verbose) self.r_cut = r_cut self.g2_params = g2_params self.g3_params = g3_params self.g4_params = g4_params self.g5_params = g5_params self.elements = elements self.periodic = periodic self.sparse = sparse self.dscribe_n_jobs = dscribe_n_jobs self.dscribe_only_physical_cores = dscribe_only_physical_cores def _get_strct_op_properties(self, label_list, strct_op, input_p): return strct_op[label_list].perform_analysis( method=calculate_acsf_descriptor, kwargs=input_p )
[docs] class StructureSOAPTransformer(_BaseDscribeTransformer): """ Extract SOAP descriptor as defined in :doi:`10.1103/PhysRevB.87.184115`. This transformer class is based on the implementations of the dscribe python package. Attributes ---------- r_cut : float Cutoff value. n_max : int The number of radial basis functions. l_max : int The maximum degree of spherical harmonics. sigma : float The standard deviation of the gaussians. rbf : str The radial basis functions to use. Supported options are: ``'gto'`` or ``'polynomial'``. weighting : dict Contains the options which control the weighting of the atomic density. compression : dict Feature compression options. average : str The averaging mode over the centers of interest. Supported options are: ``'off'``, ``'inner'`` or ``'outer'``. elements : list List of atomic numbers or chemical symbols. periodic : bool Whether to consider periodic boundary conditions. sparse : bool Whether to return a sparse matrix or a dense array. dscribe_n_jobs : int Number of jobs used by dscribe to calculate the interaction matrix. dscribe_only_physical_cores : bool Whether to only use physicsl cores. n_procs : int (optional) Number of parallel processes. chunksize : int (optional) Number of structures handed to each process at once. verbose : bool (optional) Whether to print a progress bar. """ _supported_params = ( "r_cut", "n_max", "l_max", "sigma", "weighting", "compression", "average", "elements", "periodic", "sparse", "dscribe_n_jobs", "dscribe_only_physical_cores", ) def __init__( self, r_cut=7.5, n_max=8, l_max=6, sigma=1.0, rbf="gto", weighting=None, compression={"mode": "off", "species_weighting": None}, average="off", elements=None, periodic=False, sparse=False, dscribe_n_jobs=1, dscribe_only_physical_cores=False, n_procs=1, chunksize=50, verbose=True, ): """Initialize object.""" _BaseStructureTransformer.__init__(self, n_procs, chunksize, verbose) self.r_cut = r_cut self.n_max = n_max self.l_max = l_max self.sigma = sigma self.rbf = rbf self.weighting = weighting self.compression = compression self.average = average self.elements = elements self.periodic = periodic self.sparse = sparse self.dscribe_n_jobs = dscribe_n_jobs self.dscribe_only_physical_cores = dscribe_only_physical_cores def _get_strct_op_properties(self, label_list, strct_op, input_p): return strct_op[label_list].perform_analysis( method=calculate_soap_descriptor, kwargs=input_p )
[docs] class StructureMBTRTransformer(_BaseDscribeTransformer): """ Extract MBTR descriptor as defined in :doi:`10.1088/2632-2153/aca005`. This transformer class is based on the implementations of the dscribe python package. Attributes ---------- geometry : dict Setup the geometry function. grid : dict Setup the discretization grid. weighting : dict Setup the weighting function and its parameters. normalize_gaussians : bool Whether to normalize the gaussians to an area of 1. normalization : str Method for normalizing. Supported options are ``'none'``, ``'l2'``, ``'n_atoms'``, ``'valle_oganov'``. elements : list List of atomic numbers or chemical symbols. periodic : bool Whether to consider periodic boundary conditions. sparse : bool Whether to return a sparse matrix or a dense array. dscribe_n_jobs : int Number of jobs used by dscribe to calculate the interaction matrix. dscribe_only_physical_cores : bool Whether to only use physicsl cores. n_procs : int (optional) Number of parallel processes. chunksize : int (optional) Number of structures handed to each process at once. verbose : bool (optional) Whether to print a progress bar. """ _supported_params = ( "geometry", "grid", "weighting", "normalize_gaussians", "normalization", "elements", "periodic", "sparse", "dscribe_n_jobs", "dscribe_only_physical_cores", ) def __init__( self, geometry={"function": "inverse_distance"}, grid={"min": 0, "max": 1, "n": 100, "sigma": 0.1}, weighting={"function": "exp", "scale": 1.0, "threshold": 1e-3}, normalize_gaussians=True, normalization="l2", elements=None, periodic=False, sparse=False, dscribe_n_jobs=1, dscribe_only_physical_cores=False, n_procs=1, chunksize=50, verbose=True, ): """Initialize object.""" _BaseStructureTransformer.__init__(self, n_procs, chunksize, verbose) self.geometry = geometry self.grid = grid self.weighting = weighting self.normalize_gaussians = normalize_gaussians self.normalization = normalization self.elements = elements self.periodic = periodic self.sparse = sparse self.dscribe_n_jobs = dscribe_n_jobs self.dscribe_only_physical_cores = dscribe_only_physical_cores def _get_strct_op_properties(self, label_list, strct_op, input_p): return strct_op[label_list].perform_analysis( method=calculate_mbtr_descriptor, kwargs=input_p )