Source code for aim2dat.fct.function_comparison

"""Module to analyze spectra."""

# Standard library imports
from scipy import spatial
from scipy import stats
import numpy as np

# Internal library imports
from aim2dat.fct.fingerprint import FunctionDiscretizationFingerprint
from aim2dat.fct.discretization import DiscretizedGrid


[docs] class FunctionAnalysis: """Class to analyze and compare functions.""" def __init__(self, **kwargs): """Initialize object.""" super().__init__(**kwargs) self._data = {} @property def allowed_distance_methods(self) -> list: """Return allowed distance methods.""" return ["euclidian", "cosine", "total", "absolute"]
[docs] def import_data(self, data_label: str, x_values: np.array, y_values: np.array): """Import data into the internal memory. Parameters ---------- data_label : str Label for the internal memory. x_values : np.array x-values of the function. y_values : np.array y-values of the function. """ if data_label in self._data: raise ValueError(f"Key {data_label} already exists.") self._data[data_label] = {"x_values": x_values, "y_values": y_values}
def _return_data(self, label: str) -> dict: """Return dataset of the internal memory. Parameters ---------- label : str : Label of dataset. Returns ------- type dict : Dictionary containing x- and y-values of the function. """ if label not in self._data: raise ValueError(f"Key {label} does not exist.") return self._data[label] def _calulate_distance(self, method: str) -> "callable": """Return distance method based on different metrics. Parameters ---------- method : str Distance method to use. Returns ------- type callable : Callable distance method. """ return getattr(self, f"_calculate_{method}_distance") def _calculate_euclidian_distance(self, data1: np.array, data2: np.array) -> float: """Calculate the euclidian norm of the difference vector of two vectors. Parameters ---------- data1 : np.array First dataset. data2 : np.array Second dataset. Returns ------- float Euclidian norm. """ return np.linalg.norm(data1 - data2) def _calculate_cosine_distance(self, data1: np.array, data2: np.array) -> float: """Calculate the cosine distance between two vectors. Parameters ---------- data1 : np.array First dataset. data2 : np.array Second dataset. Returns ------- float Cosine distance. """ return spatial.distance.cosine(data1, data2) def _calculate_total_distance(self, data1: np.array, data2: np.array) -> float: """Calculate the summed total difference between two vectors. Parameters ---------- data1 : np.array First dataset. data2 : np.array Second dataset. Returns ------- float Total difference. """ return np.sum(data1 - data2) def _calculate_absolute_distance(self, data1: np.array, data2: np.array) -> float: """Calculate the summed absolute difference between two vectors. Parameters ---------- data1 : np.array First dataset. data2 : np.array Second dataset. Returns ------- float Absolute difference. """ return np.sum(np.abs(data1 - data2))
[docs] def calculate_correlation(self, label1: str, label2: str) -> float: """Calculate the pearson-correlation between the values of two functions. Parameters ---------- label_1 : str Label of the first dataset. label_2 : str Label of the second dataset. Returns ------- float The correlation. """ data1 = self._return_data(label1).get("y_values") data2 = self._return_data(label2).get("y_values") return stats.pearsonr(data1, data2)[0]
# ToDo add cross correlation at some point
[docs] def calculate_distance(self, label1: str, label2: str, method: str = "euclidian") -> float: """Calculate the distance between the values of two functions. Parameters ---------- label1 : str Label of the first dataset. label2 : str Label of the second dataset. method : str The metric to calculate the distance. Defaults to "euclidian". Returns ------- float Abs. error """ data1 = self._return_data(label1).get("y_values") data2 = self._return_data(label2).get("y_values") distance_method = self._calulate_distance(method) return distance_method(data1, data2)
def _calculate_area(self, label: str) -> float: """Calculate the enclosed area of a function. Parameters ---------- label : str Label of the dataset. Returns ------- float The enclosed area. """ data = self._return_data(label) x_data = data.get("x_values") y_data = data.get("y_values") area = np.trapz(y_data, x_data) return area
[docs] def compare_areas(self, label1: str, label2: str) -> float: """Compare the enclosed areas of two functions. Parameters ---------- label1 : str Label of the first dataset. label2 : str Label of the second dataset. Returns ------- float The ratio of the areas. (A1 / A2) """ area1 = self._calculate_area(label1) area2 = self._calculate_area(label2) return area1 / area2
[docs] def calculate_discrete_fingerprint(self, label: str, grid: DiscretizedGrid) -> np.array: """Calculate a discretized fingerprint of a function (:doi:`10.1038/s41597-022-01754-z`). Parameters ---------- label : str Label of the dataset. grid : DiscretizedGrid The grid to discretize the function. Returns ------- np.array Discretized fingerprint. """ data = self._return_data(label) x_data = data.get("x_values") y_data = data.get("y_values") spectra_fp = FunctionDiscretizationFingerprint(grid=grid) fp = spectra_fp.calculate_fingerprint(x_data, y_data) return fp
[docs] def compare_functions_by_discrete_fingerprint( self, label1: str, label2: str, grid: DiscretizedGrid ) -> float: """ Compare two functions based on a discretized fingerprint (:doi:`10.1038/s41597-022-01754-z`). Parameters ---------- label1 : str Label of the first dataset. label2 : str Label of the second dataset. grid : DiscretizedGrid The grid to discretize the function. Returns ------- float Similarity. """ data1 = self._return_data(label1) x_data1 = data1.get("x_values") y_data1 = data1.get("y_values") data2 = self._return_data(label2) x_data2 = data2.get("x_values") y_data2 = data2.get("y_values") spectra_fp = FunctionDiscretizationFingerprint(grid=grid) _ = spectra_fp.calculate_fingerprint(x_data1, y_data1, label1) _ = spectra_fp.calculate_fingerprint(x_data2, y_data2, label2) return spectra_fp.compare_fingerprints(label1, label2)