Source code for aim2dat.utils.chem_formula
"""Module to parse representations of chemical formulas into each other."""
# Standard library imports
import re
import math
import functools
import fractions
[docs]
def transform_str_to_dict(formula_str):
"""
Create a dictionary from a formula string. The function supports round, squared and curly
brackets as well as recurring elements.
Examples
--------
>>> transform_str_to_dict('HOH')
{'H': 2.0, 'O': 1.0}
>>> transform_str_to_dict("H.5(CO)CH3{OH[CH]4}3.5")
{'C': 16.0, 'O': 4.5, 'H': 21.0}
Parameters
----------
formula_str : str
Chemical formula as string, e.g. ``Fe2O3`, ``H2O``
Returns
-------
formula_dict : dict
Chemical formula as dictionary, e.g. ``{'Fe' : 2.0, 'O' : 3.0}`` or
``{'H' : 2.0, 'O' : 1.0}``
"""
def _add_to_dict(formula_dict, key, val):
if key in formula_dict:
formula_dict[key] += val
else:
formula_dict[key] = val
def _get_groups(formula_str):
grp_qty_pattern = re.compile(r"([\)\]\}](\d*(\.\d+)?))")
stack = []
groups = []
last_grp = 0
for idx, char in enumerate(formula_str):
if char in ("(", "[", "{"):
stack.append(idx)
elif char in (")", "]", "}"):
group_st = stack.pop()
if len(stack) > 0:
continue
if len(groups) > 0 and last_grp < group_st:
groups.append((last_grp + 1, group_st, 1.0))
match = grp_qty_pattern.match(formula_str, idx)
quantity = 1.0
last_grp = idx
if match.group(2) != "":
quantity = float(match.group(2))
last_grp = match.end(2)
groups.append((group_st + 1, idx, quantity))
if len(groups) > 0 and groups[0][0] != 1:
groups.append((0, groups[0][0] - 1, 1.0))
if len(groups) > 0 and last_grp != len(formula_str):
groups.append((last_grp, len(formula_str), 1.0))
return groups
if isinstance(formula_str, str):
formula_dict = {}
formula_str = formula_str.replace("@", "")
groups = _get_groups(formula_str)
if len(groups) == 0:
regex = r"(?P<element>[A-Z][a-z]?)(?P<quantity>\d*(\.\d+)?)?"
for match in re.finditer(regex, formula_str):
quantity = 1.0 if match["quantity"] == "" else float(match["quantity"])
_add_to_dict(formula_dict, match["element"], quantity)
else:
for group in groups:
group_dict = transform_str_to_dict(formula_str[group[0] : group[1]])
for key, val in group_dict.items():
_add_to_dict(formula_dict, key, val * group[2])
elif isinstance(formula_str, dict):
formula_dict = formula_str
else:
raise TypeError("Chemical formula has to be of type str or dict.")
return formula_dict
[docs]
def transform_dict_to_str(formula_dict, output_type=None):
"""
Create a string from a formula dictionary, fractional quantities are rounded.
Parameters
----------
formula_dict : dict
Chemical formula as dictionary, e.g. ``{'Fe' : 2.0, 'O' : 3.0}`` or
``{'H' : 2.0, 'O' : 1.0}``
output_type : None or str
If set to ``'alphabetic'`` the output formula will be alphabetically ordered.
Returns
-------
formula_str : str
Chemical formula as string, e.g. ``Fe2O3`, ``H2O``
"""
if isinstance(formula_dict, dict):
elements = list(formula_dict.keys())
if output_type is not None:
if output_type == "alphabetic":
elements.sort()
else:
raise ValueError(f"The output_type `{output_type}` is not supported.")
formula_l = []
for el in elements:
nr = formula_dict[el]
if nr == 1:
formula_l.append(el)
elif isinstance(nr, float) and nr.is_integer():
formula_l.append(el + str(int(round(nr, 0))))
else:
formula_l.append(el + str(nr))
formula_str = "".join(formula_l)
elif isinstance(formula_dict, str):
formula_str = formula_dict
else:
raise TypeError("Chemical formula has to be of type str or dict.")
return formula_str
[docs]
def transform_dict_to_latexstr(formula_dict):
r"""
Create a string from a formula dictionary, fractional quantities are rounded.
Parameters
----------
formula_dict : dict
Chemical formula as dictionary, e.g. ``{'Fe' : 2.0, 'O' : 3.0}`` or
``{'H' : 2.0, 'O' : 1.0}``.
Returns
-------
formula_str : str
Chemical formula as string with latex formating, e.g.
``r'$\mathrm{Fe}_{\mathrm{2}}\mathrm{O3}$'``, ``r'$\mathrm{H}_{\mathrm{2}}\mathrm{O}$'``.
"""
name = []
for el in [*formula_dict.items()]:
if str(int(el[1])) == "1":
name.append(r"\mathrm{" + str(el[0]) + r"}")
else:
name.append(r"\mathrm{" + str(el[0]) + r"}_\mathrm{" + str(int(round(el[1], 0))) + "}")
return r"$" + r"".join(name) + r"$"
[docs]
def transform_list_to_dict(formula_list):
"""
Convert a list of elements to a dictionary.
Parameters
----------
formula_list : list
Chemical formula as list, e.g. ``['Fe', 'Fe', 'O', 'O', 'O']`` or ``['H', 'O', 'H']``
Returns
-------
formula_dict : dict
Chemical formula as dictionary, e.g. ``{'Fe' : 2.0, 'O' : 3.0}`` or
``{'H' : 2.0, 'O' : 1.0}``
"""
formula_dict = {}
for el in formula_list:
if el in formula_dict.keys():
formula_dict[el] += 1
else:
formula_dict[el] = 1
return formula_dict
[docs]
def transform_list_to_str(formula_list):
"""
Convert a list of elements to a dictionary.
Parameters
----------
formula_list : list
Chemical formula as list, e.g. ``['Fe', 'Fe', 'O', 'O', 'O']`` or ``['H', 'O', 'H']``
Returns
-------
formula_str : str
Chemical formula as string, e.g. ``Fe2O3`, ``H2O``
"""
return transform_dict_to_str(transform_list_to_dict(formula_list))
[docs]
def reduce_formula(formula_dict, tolerance=1.0e-4):
"""
Try to find a reduced formula only having natural numbers as quantities
Parameters
----------
formula_dict : dict
Chemical formula as dictionary, e.g. ``{'Fe' : 4.0, 'O' : 6.0}`` or
``{'H' : 2.0, 'O' : 1.0}``
tolerance : float
Tolerance to determine fractions, e.g., ``0.33333`` is intepreted as 1/3 for a tolerance of
``1.0e-4``.
Returns
-------
formula_red : dict
Chemical formula as dictionary, e.g. ``{'Fe' : 2, 'O' : 3}`` or
``{'H' : 2, 'O' : 1}``
"""
if len(formula_dict) == 1:
int_values = [1]
else:
factor = 1
fracts = []
for val in formula_dict.values():
frac = fractions.Fraction(val).limit_denominator(math.floor(1.0 / tolerance))
den = frac.denominator
fracts.append(frac)
if factor == 1 or factor % den != 0:
factor *= den
int_values = [int(frac.numerator * factor / frac.denominator) for frac in fracts]
gcd = functools.reduce(math.gcd, int_values)
int_values = [int(val / gcd) for val in int_values]
return {key: val for key, val in zip(formula_dict.keys(), int_values)}
[docs]
def compare_formulas(chem_formula1, chem_formula2, reduce_formulas=False):
"""
Check if two chemical formulas are identical.
Parameters
----------
chem_formula1 : dict
Chemical formula as dictionary, e.g. ``{'Fe' : 4.0, 'O' : 6.0}`` or
``{'H' : 2.0, 'O' : 1.0}``.
chem_formula2 : dict
Chemical formula as dictionary, e.g. ``{'Fe' : 4.0, 'O' : 6.0}`` or
``{'H' : 2.0, 'O' : 1.0}``.
reduce_formulas : bool
Whether to reduce the formulas before comparison.
Returns
-------
match : bool
``True`` if the two forumals are identical
"""
if reduce_formulas:
chem_formula1 = reduce_formula(chem_formula1)
chem_formula2 = reduce_formula(chem_formula2)
elements_cf2 = list(chem_formula2.keys())
match = True
for el1 in chem_formula1.keys():
if el1 in chem_formula2:
elements_cf2.remove(el1)
if chem_formula1[el1] != chem_formula2[el1]:
match = False
break
else:
match = False
break
# In case some elements were not in formula1 but in formula2:
if len(elements_cf2) > 0:
match = False
return match