"""
Module implementing the StructureCollection class to handle a set of molecular or
crystalline structures.
"""
# Standard library imports
import re
import copy
from warnings import warn
from typing import Union, List, Tuple, Iterator
# Third party library imports
import pandas as pd
from ase import Atoms
try:
import aiida
except ImportError:
aiida = None
try:
import pymatgen
except ImportError:
pymatgen = None
# Internal library imports
from aim2dat.strct.strct import Structure
from aim2dat.ext_interfaces import _return_ext_interface_modules
import aim2dat.utils.print as utils_pr
import aim2dat.utils.chem_formula as utils_cf
[docs]
class StructureCollection:
"""
The StructureCollection class is a container for one or multiple atomic structures. It
implements several ``import_*`` and ``append_*`` functions to add new data to the object.
Parameters
----------
structures : list
List of ``Structure`` or dict objects.
"""
def __init__(self, structures: Union[List[Union[Structure, dict]], None] = None):
"""Initialize object."""
self._structures = []
if structures is not None:
for strct in structures:
if isinstance(strct, Structure):
self.append_structure(strct)
elif isinstance(strct, dict):
self.append(**strct)
else:
raise TypeError(
"`structures` needs to be a list containing "
"dictionary or Structure objects."
)
@property
def labels(self) -> List[str]:
"""
Labels assigened to the structures.
"""
return [strct.label for strct in self._structures]
[docs]
def index(self, label: str):
"""
Return index of label. If the label is not present, ``None`` is returned.
Parameters
----------
str
Label of the structure.
"""
index = None
for idx, strct in enumerate(self._structures):
if strct.label == label:
index = idx
break
return index
[docs]
def items(self) -> List[Tuple[str, Structure]]:
"""
Return a list of label, value tuples.
"""
return [(strct.label, strct) for strct in self._structures]
[docs]
def pop(self, key: Union[str, int]) -> Structure:
"""
Pop structure.
Parameters
----------
str
Key of the structure.
"""
strct, index, label = self.get_structure(key, True)
del self._structures[index]
return strct
def __str__(self) -> str:
"""
Represent object as string.
"""
def create_structure_summaries(start, end):
strct_list = []
for strct in self[start:end]:
cf_str = utils_cf.transform_dict_to_str(strct.chem_formula)
strct_str = (
strct.label
+ " ".join([""] * (20 - len(strct.label)))
+ " "
+ cf_str
+ " ".join([""] * (20 - len(cf_str)))
)
strct_str += (
" ["
+ " ".join(
str(val) + " ".join([""] * (6 - len(str(val)))) for val in strct.pbc
)
+ "]"
)
strct_list.append(strct_str)
return strct_list
output_str = utils_pr._print_title("Structure Collection") + "\n\n"
output_str += " - Number of structures: " + str(len(self)) + "\n"
output_str += " - Elements: " + "-".join(self.get_all_elements()) + "\n"
output_str += "\n"
output_str += utils_pr._print_subtitle("Structures") + "\n"
if len(self) < 11:
strct_list = create_structure_summaries(0, len(self))
else:
output_str += utils_pr._print_list("", create_structure_summaries(0, 5))
output_str += " ...\n"
strct_list = create_structure_summaries(len(self) - 5, len(self))
output_str += utils_pr._print_list("", strct_list)
output_str += utils_pr._print_hline()
return output_str
def __len__(self) -> int:
"""
Return length of the object.
"""
return len(self._structures)
def __getitem__(
self, key: Union[str, int, tuple, list, slice]
) -> Union[Structure, "StructureCollection"]:
"""
Return structure by key. If a slice, tuple or list of keys is given a
``StructureCollection`` object of the subset is returned.
Parameters
----------
str
Key of the structure(s).
Returns
-------
Structure or StructureCollection
structure or structures.
"""
if isinstance(key, (str, int)):
return self.get_structure(key)
elif isinstance(key, (slice, tuple, list)):
new_sc = StructureCollection()
if isinstance(key, slice):
start = key.start if key.start is not None else 0
if start < 0:
start += len(self)
stop = key.stop if key.stop is not None else len(self)
if stop < 0:
stop += len(self)
key = range(start, stop)
for key0 in key:
new_sc.append_structure(self.get_structure(key0))
return new_sc
else:
raise TypeError("key needs to be of type: str, int, slice, tuple or list.")
def __setitem__(self, key: Union[str, int], value: Union[dict, Structure]):
"""
Set item by index or label.
Parameters
----------
str
Key of the structure.
"""
if isinstance(value, dict):
value = Structure(**value)
self._add_structure(key, value)
def __delitem__(self, key: Union[str, int]):
"""
Delete item by index or label.
Parameters
----------
str
Key of the structure.
"""
self.pop(key)
def __iter__(self) -> Iterator[Structure]:
"""
Iterate through structures.
"""
for strct in self._structures:
yield strct
def __add__(self, other: "StructureCollection") -> "StructureCollection":
"""
Add two objects.
"""
if type(other) is type(self):
new_sc = StructureCollection()
for sc_obj in [self, other]:
for struct in sc_obj._structures:
new_sc.append_structure(struct.copy())
return new_sc
else:
raise TypeError("Can only add objects of type StructureCollection.")
def __deepcopy__(self, memo) -> "StructureCollection":
"""Create a deepcopy of the object."""
copy = StructureCollection()
for strct in self:
copy.append_structure(strct.copy())
memo[id(self)] = copy
return copy
[docs]
def copy(self) -> "StructureCollection":
"""Return copy of ``StructureCollection`` object."""
return copy.deepcopy(self)
[docs]
def append_structure(self, structure: Structure, label: str = None):
"""
Append ``Structure`` object to collection. The label of the structure needs to be
either given via the structures's property or as keyword argument.
Parameters
----------
structure : Structure
Structure object.
label : str (optional)
String used to identify the structure. Overwrites ``label`` property of the structure.
"""
self._add_structure(label, structure, raise_label_error=True)
[docs]
def append(
self,
label: str,
elements: list,
positions: list,
pbc: list,
cell: list = None,
is_cartesian: bool = True,
wrap: bool = False,
kinds: list = None,
attributes: dict = None,
extras: dict = None,
):
"""
Append structure.
Parameters
----------
label : str
String used to identify the structure.
elements : list
List of element symbols or their atomic numbers.
positions : list
List of the atomic positions, either cartesian or scaled coordinates.
pbc : list or bool
Periodic boundary conditions.
cell : list or np.array
Nested 3x3 list of the cell vectors.
is_cartesian : bool (optional)
Whether the coordinates are cartesian or scaled.
wrap : bool (optional)
Wrap atomic positions back into the unit cell.
kinds : list
List of kind names (this allows custom kinds like Ni0, Ni1, ...). If None,
the elements will be used as the kind names.
attributes : dict
Additional information about the structure.
extras : dict
Extras of the structure.
"""
structure = Structure(
label=label,
elements=elements,
positions=positions,
pbc=pbc,
cell=cell,
is_cartesian=is_cartesian,
wrap=wrap,
kinds=kinds,
attributes=attributes,
extras=None,
)
self.append_structure(structure)
[docs]
def append_from_aiida_structuredata(
self,
aiida_node: Union[int, str, "aiida.orm.StructureData"],
use_uuid: bool = False,
label: str = None,
):
"""
Append structure from aiida structuredata.
Parameters
----------
aiida_node : int, str or aiida.orm.StructureData
Primary key, UUID or AiiDA structure node.
use_uuid : bool (optional)
Whether to use the uuid (str) to represent AiiDA nodes instead of the primary key
(int).
label : str
String used to identify the structure. Overwrites ``label`` property of the structure.
"""
self.append_structure(
Structure.from_aiida_structuredata(aiida_node, label=label, use_uuid=use_uuid)
)
[docs]
def append_from_ase_atoms(self, label: str, ase_atoms: Atoms, attributes: dict = None):
"""
Append structure from ase atoms object.
Parameters
----------
label : str
String used to identify the structure.
ase_atoms : ase.Atoms
ase Atoms object.
attributes : dict
Additional information about the structure.
"""
self.append_structure(
Structure.from_ase_atoms(ase_atoms, label=label, attributes=attributes)
)
[docs]
def append_from_pymatgen_structure(
self,
label: str,
pymatgen_structure: Union["pymatgen.core.Molecule", "pymatgen.core.Structure"],
attributes: dict = None,
):
"""
Append structure from pymatgen structure or molecule object.
Parameters
----------
label : str
String used to identify the structure.
pymatgen_structure : pymatgen.core.Structure or pymatgen.core.Molecule
pymatgen structure or molecule object.
attributes : dict
Additional information about the structure.
"""
self.append_structure(
Structure.from_pymatgen_structure(
pymatgen_structure, label=label, attributes=attributes
)
)
[docs]
def append_from_file(
self,
label: str,
file_path: str,
attributes: dict = None,
backend: str = "ase",
backend_kwargs: dict = None,
):
"""
Append structure from file using the ase read-function.
Parameters
----------
label : str
String used to identify the structure.
file_path : str
File path.
attributes : dict
Additional information about the structure.
"""
structure = Structure.from_file(
file_path,
label=label,
attributes=attributes,
backend=backend,
backend_kwargs=backend_kwargs,
)
if isinstance(structure, Structure):
structure = [structure]
for strct in structure:
self.append_structure(strct)
[docs]
def import_from_aiida_db(
self, group_label: str = None, use_uuid: bool = False, raise_error: bool = True
):
"""
Import from the AiiDA database.
Parameters
----------
group_label : str or list (optional)
Constrains query to structures that are member of the group(s).
use_uuid : bool (optional)
Whether to use the uuid (str) to represent AiiDA nodes instead of the primary key
(int).
raise_error : bool (optional)
Whether to raise an error if one of the constraints is not met.
"""
backend_module = _return_ext_interface_modules("aiida")
structure_nodes = []
if not isinstance(group_label, list):
group_label = [group_label]
for gl0 in group_label:
structure_nodes = backend_module._query_structure_nodes(group_label=gl0)
for structure_node in structure_nodes:
structure = Structure.from_aiida_structuredata(structure_node, use_uuid)
if structure.label is None:
structure.label = f"aiida_{len(self)}"
self._add_structure(
key=structure.label,
structure=structure,
raise_label_error=raise_error,
raise_label_warning=True,
)
[docs]
def import_from_pandas_df(
self,
data_frame: pd.DataFrame,
structure_column: str = "optimized_structure",
exclude_columns: list = [],
use_uuid: bool = False,
raise_error: bool = True,
):
"""
Import from pandas data frame.
Parameters
----------
data_frame : pd.DataFrame
Pandas data frame containing at least one column with the AiiDA structure nodes.
structure_column : str (optional)
Column containing AiiDA structure nodes used to determine structural and compositional
properties. The default value is ``'optimized_structure'``.
exclude_columns : list (optional)
Columns of the data frame that are excluded. The default value is ``[]``.
use_uuid : bool (optional)
Whether to use the uuid (str) to represent AiiDA nodes instead of the primary key
(int).
raise_error : bool (optional)
Whether to raise an error if one of the constraints is not met.
"""
label_unit_pattern = re.compile(r"^([\S\s]+)?\s\(([\S\s]+)\)$")
backend_module = _return_ext_interface_modules("aiida")
for _, row in data_frame.iterrows():
structure_node = row.pop(structure_column)
if structure_node is None or structure_node is pd.NA:
continue
structure = Structure.from_aiida_structuredata(structure_node, use_uuid)
if structure.label is None and "parent_node" in row:
structure.label = backend_module._extract_label_from_aiida_node(row["parent_node"])
if "label" in row:
new_label = row.pop("label")
if structure.label is None:
structure.label = new_label
if structure.label is None:
structure.label = f"pandas_{len(self)}"
for label0, value in row.items():
if "el_conc" in label0 or "nr_atoms" in label0:
continue
if label0 in exclude_columns:
continue
match = label_unit_pattern.match(label0)
if match:
structure.set_attribute(
match.groups()[0],
{
"value": data_frame.dtypes[label0].type(value),
"unit": match.groups()[1],
},
)
else:
try:
structure.set_attribute(label0, data_frame.dtypes[label0].type(value))
except TypeError:
continue
self._add_structure(
structure.label, structure, raise_label_warning=True, raise_label_error=raise_error
)
[docs]
def import_from_hdf5_file(self, file_path: str, raise_error: bool = True):
"""
Import from hdf5-file. Calculated extras are not yet supported.
Parameters
------------
file_path : str
File path.
raise_error : bool (optional)
Whether to raise an error if one of the constraints is not met.
"""
backend_module = _return_ext_interface_modules("hdf5")
for structure in backend_module._import_from_hdf5_file(file_path):
self._add_structure(
structure.label,
structure,
raise_label_warning=True,
raise_label_error=False,
)
[docs]
def duplicate_structure(self, key: Union[str, int], new_label: str):
"""
Duplicate structure.
Parameters
----------
key : str or int
Key of the structure.
new_label : str
Label of the copied structure.
"""
self.append_structure(self.get_structure(key), label=new_label)
[docs]
def get_structure(self, key: Union[str, int], return_index_label: bool = False) -> Structure:
"""
Get structure by key.
Parameters
----------
key : str or int
Key of the structure.
Returns
----------
Structure
structure.
"""
# TODO
index, label = self._get_index_label(key)
if index is None or label is None:
structure = None
else:
structure = self._structures[index]
if return_index_label:
return structure, index, label
return structure
[docs]
def get_all_structures(self) -> List[Structure]:
"""
Return a list of all structures.
Returns
-------
list
List of all structures stored in the object.
"""
return [self.get_structure(label) for label in self.labels]
[docs]
def store_in_hdf5_file(self, file_path: str):
"""
Store structures in hdf5-file. Calculated extras are not yet supported.
Parameters
------------
file_path : str
File path.
"""
backend_module = _return_ext_interface_modules("hdf5")
backend_module._store_in_hdf5_file(file_path, self._structures)
[docs]
def store_in_aiida_db(self, group_label: str = None, group_description: str = None):
"""
Store structures into the AiiDA-database.
Parameters
----------
group_label : str (optional)
Label of the AiiDA group.
group_description : str (optional)
Description of the AiiDA group.
Returns
-------
list
List containing dictionary of all structure nodes.
"""
backend_module = _return_ext_interface_modules("aiida")
if group_label is not None:
print(f"Storing data as group `{group_label}` in the AiiDA database.")
if group_description is None:
group_description = "Structures from StructureCollection."
return backend_module._store_data_aiida(group_label, group_description, self._structures)
[docs]
def create_pandas_df(self, exclude_columns: list = []) -> pd.DataFrame:
"""
Create a pandas data frame of the object.
Parameters
----------
exclude_columns : list (optional)
Columns that are not shown in the pandas data frame.
Returns
-------
pandas.DataFrame
Pandas data frame.
"""
backend_module = _return_ext_interface_modules("pandas")
return backend_module._create_strct_c_pandas_df(self, exclude_columns)
[docs]
def get_all_elements(self) -> List[str]:
"""
Get the element symbols of all structures.
Returns
-------
list
List of all element symbols .
"""
all_elements = []
for strct in self:
all_elements += strct["elements"]
return sorted(set(all_elements))
[docs]
def get_all_kinds(self) -> list:
"""
Get the kind strings of all structures.
Returns
-------
list
List of all kinds.
"""
all_kinds = []
for strct in self:
all_kinds += strct.kinds
return sorted(set(all_kinds))
[docs]
def get_all_attribute_keys(self) -> list:
"""
Get all attribute keys.
Returns
-------
list
All attribute keys.
"""
all_attr_keys = []
for strct in self:
all_attr_keys += list(strct.attributes.keys())
return sorted(set(all_attr_keys))
def _get_index_label(self, key: Union[str, int]) -> Tuple[Union[int, None], Union[str, None]]:
if isinstance(key, str):
return self.index(key), key
elif isinstance(key, int):
if key < len(self):
return key, self._structures[key].label
else:
return key, None
return None, None
def _add_structure(
self,
key: Union[str, int],
structure: Structure,
raise_label_warning: bool = False,
raise_label_error: bool = False,
):
if any(id(structure) == id(strct) for strct in self):
structure = structure.copy()
if key is None:
key = structure.label
if isinstance(key, str):
index = self.index(key)
structure.label = key
if raise_label_warning and index is not None:
warn(
f"Index '{index}' is being overwritten.",
UserWarning,
2,
)
elif isinstance(key, int):
if key < len(self):
index = key
label = self._structures[key].label
if raise_label_warning:
warn( # TODO untested
f"Label '{label}' is being overwritten.",
UserWarning,
2,
)
if structure.label is None:
structure.label = label
elif structure.label in self.labels:
raise ValueError(f"Label '{structure.label}' already used.")
else:
raise ValueError(f"Index out of range ({key} >= {len(self)}).")
else:
raise TypeError("`key` needs to be of type int or str.")
if structure.label in self.labels and raise_label_error:
raise ValueError(f"Label '{structure.label}' already used.")
if index is None:
self._structures.append(structure)
else:
self._structures[index] = structure