Source code for aim2dat.io.qe

"""
Module of functions to read output-files of Quantum ESPRESSO.
"""

# Standard library imports
import re
import xml.etree.ElementTree as ET
import copy

# Third party library imports
import numpy as np

# Internal library imports
from aim2dat.io.utils import read_structure, read_multiple, custom_open
from aim2dat.io.base_parser import transform_str_value
from aim2dat.utils.units import length
from aim2dat.utils.dict_tools import dict_create_tree



[docs]
@read_structure(r".*\.xml", preset_kwargs={"extract_structures": True})
def read_xml(
    file_name: str,
    extract_structures: bool = False,
    strct_type: str = None,
    strct_include: list = None,
):
    """
    Read xml output file.

    Parameters
    ----------
    file_name : str
        Path of the xml-file of Quantum ESPRESSO.
    extract_structures : bool
        Whether to extract alls crystal structures and add them to the output dictionary with the
        key ``'structures'``.
    strct_type : str
        Type of extracted structure(s). Supported options are ``'input'``, ``'output'`` or
        ``'steps'``.
    strct_include : list
        List of dictionary keys that are included in the structure attributes.

    Returns
    -------
    dict
        Output dictionary.
    """

    def _parse_atomic_structure(element):
        outp_dict = {
            "system": {
                "nat": int(element.attrib["nat"]),
                "alat": float(element.attrib["alat"]),
            }
        }
        if "bravais_index" in element.attrib:
            outp_dict["system"]["ibrav"] = int(element.attrib["bravais_index"])
        for c1 in element:
            if c1.tag == "atomic_positions":
                outp_dict["atomic_positions"] = [
                    [c.attrib["name"]] + [float(val) for val in c.text.split()] for c in c1
                ]
            elif c1.tag == "cell":
                outp_dict["cell"] = [[float(val) for val in c.text.split()] for c in c1]
        return outp_dict

    def _parse_kpoints(element):
        if element[0].tag == "nks":
            return {
                "nks": int(element[0].text),
                "k_points": [
                    [float(v) for v in c.text.split()] + [float(c.attrib["weight"])]
                    for c in element[1:]
                ],
            }
        elif element[0].tag == "monkhorst_pack":
            return {
                "automatic": [
                    int(element[0].attrib[k]) for k in ["nk1", "nk2", "nk3", "k1", "k2", "k3"]
                ]
            }

    def _parse_array(element):
        dims = [int(val) for val in reversed(element.attrib["dims"].split())]
        arr = np.array([float(val) for val in element.text.split()]).reshape(dims)
        return [v.tolist() for v in arr]

    def _parse_general_info(element, outp_dict):
        outp_dict["general_info"] = {
            c.tag: c.attrib for c in element if c.tag in ["creator", "created"]
        }

    def _parse_parallel_info(element, outp_dict):
        outp_dict["parallel_info"] = {c.tag: int(c.text) for c in element}

    def _parse_input(element, outp_dict):
        bfgs_mapping = {
            "ndim": "bfgs_ndim",
            "trust_radius_min": "trust_radius_min",
            "trust_radius_max": "trust_radius_max",
            "trust_radius_init": "trust_radius_ini",
            "w1": "w_1",
            "w2": "w_2",
        }
        inp_dict = outp_dict.setdefault("input", {"system": {}})
        for child in element:
            if child.tag == "control_variables":
                inp_dict["control"] = {c.tag: transform_str_value(c.text) for c in child}
            elif child.tag == "atomic_species":
                inp_dict["system"]["ntyp"] = int(child.attrib["ntyp"])
                inp_dict["atomic_species"] = {
                    c.attrib["name"]: (float(c[0].text), c[1].text) for c in child
                }
            elif child.tag == "atomic_structure":
                strct_dict = _parse_atomic_structure(child)
                strct_dict["cell_parameters"] = strct_dict.pop("cell")
                inp_dict["system"].update(strct_dict.pop("system"))
                inp_dict.update(strct_dict)
            elif child.tag == "dft":
                for c1 in child:
                    if c1.tag == "functional":
                        inp_dict["system"]["input_dft"] = c1.text
                    elif c1.tag == "vdW":
                        inp_dict["system"].update({c.tag: transform_str_value(c.text) for c in c1})
            elif child.tag in ["spin", "bands"]:
                inp_dict[child.tag] = {c.tag: transform_str_value(c.text) for c in child}
            elif child.tag == "basis":
                inp_dict["system"].update({c.tag: transform_str_value(c.text) for c in child})
            elif child.tag == "electron_control":
                inp_dict["electrons"] = {c.tag: transform_str_value(c.text) for c in child}
            elif child.tag == "k_points_IBZ":
                inp_dict["k_points"] = _parse_kpoints(child)
            elif child.tag == "ion_control":
                inp_dict["ions"] = {}
                for c1 in child:
                    # TODO take care of molecular dynamics:
                    if c1.tag == "bfgs":
                        inp_dict["ions"].update(
                            {bfgs_mapping[c.tag]: transform_str_value(c.text) for c in c1}
                        )
                    else:
                        inp_dict["ions"][c1.tag] = transform_str_value(c1.text)
            elif child.tag == "cell_control":
                inp_dict["cell"] = {}
                for c1 in child:
                    if c1.tag == "cell_do_free":
                        inp_dict["cell"]["cell_dofree"] = transform_str_value(c1.text)
                    else:
                        inp_dict["cell"][c1.tag] = transform_str_value(c1.text)
            elif child.tag == "symmetry_flags":
                inp_dict["system"].update({c.tag: transform_str_value(c.text) for c in child})
            elif child.tag == "free_positions":
                inp_dict["free_positions"] = _parse_array(child)

    def _parse_step(element, outp_dict):
        step_dict = {}
        for c1 in element:
            if c1.tag == "scf_conv":
                step_dict["scf_conv"] = {c.tag: transform_str_value(c.text) for c in c1}
            elif c1.tag == "atomic_structure":
                step_dict.update(_parse_atomic_structure(c1))
            elif c1.tag == "total_energy":
                step_dict["total_energy"] = {c.tag: float(c.text) for c in c1}
            elif "rank" in c1.attrib and "dims" in c1.attrib:
                step_dict[c1.tag] = _parse_array(c1)
            # elif c1.tag == "stress":

        outp_dict.setdefault("steps", []).append(step_dict)

    def _parse_output(element, outp_dict):
        output = outp_dict.setdefault("output", {})
        for c1 in element:
            if c1.tag == "convergence_info":
                for c2 in c1:
                    output[c2.tag] = {c.tag: transform_str_value(c.text) for c in c2}
            elif c1.tag in [
                "algorithmic_info",
                "boundary_conditions",
                "magnetization",
                "total_energy",
            ]:
                output[c1.tag] = {c.tag: transform_str_value(c.text) for c in c1}
            elif c1.tag == "atomic_structure":
                output.update(_parse_atomic_structure(c1))
            elif c1.tag == "symmetries":
                output["symmetry_operations"] = []
                for c2 in c1:
                    if c2.tag == "symmetry":
                        for c3 in c2:
                            if c3.tag == "rotation":
                                rotation = _parse_array(c3)
                            elif c3.tag == "fractional_translation":
                                translation = [float(val) for val in c3.text.split()]
                        output["symmetry_operations"].append([rotation, translation])
            elif c1.tag == "basis_set":
                output[c1.tag] = {}
                dict_create_tree(outp_dict, ["input", "system"])
                for c2 in c1:
                    if c2.tag == "fft_grid":
                        outp_dict["input"]["system"].update(
                            {f"nr{i}": int(c2.attrib[f"nr{i}"]) for i in range(1, 4)}
                        )
                    elif c2.tag == "fft_smooth":
                        outp_dict["input"]["system"].update(
                            {f"nrs{i}": int(c2.attrib[f"nr{i}"]) for i in range(1, 4)}
                        )
                    elif c2.tag == "reciprocal_lattice":
                        output[c1.tag][c2.tag] = [
                            [float(val) for val in c3.text.split()] for c3 in c2
                        ]
                    else:
                        output[c1.tag][c2.tag] = transform_str_value(c2.text)
            elif c1.tag == "band_structure":
                output[c1.tag] = {}
                for c2 in c1:
                    if c2.tag == "starting_k_points":
                        output[c1.tag][c2.tag] = _parse_kpoints(c2)
                    elif c2.tag == "ks_energies":
                        for c3 in c2:
                            if c3.tag in ["eigenvalues", "occupations"]:
                                output[c1.tag].setdefault(c3.tag, []).append(
                                    [float(val) for val in c3.text.split()]
                                )
                            elif c3.tag == "k_point":
                                output[c1.tag].setdefault("kpoints", []).append(
                                    [float(val) for val in c3.text.split()]
                                )
                                output[c1.tag].setdefault("weights", []).append(
                                    float(c3.attrib["weight"])
                                )
                    else:
                        output[c1.tag][c2.tag] = transform_str_value(c2.text)
            elif c1.tag == "forces":
                output["forces"] = _parse_array(c1)

    def _parse_timing_info(element, outp_dict):
        outp_dict["timing_info"] = {
            c1.attrib["label"]: {c.tag: float(c.text) for c in c1} for c1 in element
        }

    def _extract_structure(inp_dict, label=None, incl=None):
        cell_key = "cell_parameters" if "cell_parameters" in inp_dict else "cell"
        strct_dict = {
            "label": label,
            "elements": [],
            "positions": [],
            "cell": [[v1 * length.Bohr for v1 in v0] for v0 in inp_dict[cell_key]],
            "pbc": True,
            "is_cartesian": True,
            "attributes": {
                k: val
                for k, val in inp_dict.items()
                if k not in ["atomic_positions", cell_key, "forces"]
            },
        }
        if incl is not None:
            strct_dict["attributes"].update(copy.deepcopy(incl))
        for el, p1, p2, p3 in inp_dict["atomic_positions"]:
            strct_dict["elements"].append(el)
            strct_dict["positions"].append((p1 * length.Bohr, p2 * length.Bohr, p3 * length.Bohr))
        if "forces" in inp_dict:
            strct_dict["site_attributes"] = {"forces": [tuple(val) for val in inp_dict["forces"]]}
        return strct_dict

    tree = ET.parse(file_name)
    outp_dict = {}
    for child in tree.getroot():
        if child.tag == "exit_status":
            outp_dict["exit_status"] = int(child.text)
        elif child.tag == "closed":
            pass
        else:
            locals()[f"_parse_{child.tag}"](child, outp_dict)
    if extract_structures:
        if strct_type is None:
            if "steps" in outp_dict:
                strct_type = "steps"
            elif "output" in outp_dict:
                strct_type = "output"
            else:
                strct_type = "input"

        label = (
            ""
            if outp_dict["input"]["control"]["prefix"] is None
            else outp_dict["input"]["control"]["prefix"]
        )
        incl = (
            {}
            if strct_include is None
            else {k: v for k, v in outp_dict.items() if k in strct_include}
        )
        if strct_type == "steps":
            outp_dict["structures"] = [
                _extract_structure(d, label="_".join([label, f"step_{i}"]), incl=incl)
                for i, d in enumerate(outp_dict[strct_type])
            ]
        else:
            outp_dict["structures"] = [
                _extract_structure(
                    outp_dict[strct_type], label="_".join([label, strct_type]), incl=incl
                )
            ]
    return outp_dict




[docs]
@read_structure(r".*\.in(p)?$")
def read_input_structure(file_name):
    """
    Read structure from the Quantum ESPRESSO input file.
    ibrav parameters are not yet fully supported.

    Parameters
    ----------
    file_name : str
        Path of the input-file of Quantum ESPRESSO containing structural data.

    Returns
    -------
    dict
        Dictionary containing the structural information.
    """

    def read_system_namelist(file_content, line_idx):
        patterns = {
            "ibrav": (re.compile(r"^\s*ibrav\s*=\s*(\d)?.*$"), int),
            "A": (re.compile(r"^\s*A\s*=\s*([0-9]*\.?[0-9]*)?.*$"), float),
            "B": (re.compile(r"^\s*B\s*=\s*([0-9]*\.?[0-9]*)?.*$"), float),
            "C": (re.compile(r"^\s*C\s*=\s*([0-9]*\.?[0-9]*)?.*$"), float),
        }
        celldm_pattern = re.compile(r"^\s*celldm\((\d)?\)\s*=\s*([-+]?[0-9]*\.?[0-9]*)?.*$")

        namelist_finished = False
        unit_cell = None
        qe_cell_p = {}
        while not namelist_finished:
            if file_content[line_idx].startswith("!") or file_content[line_idx].startswith("#"):
                line_idx += 1
                continue
            for label, (pattern, match_type) in patterns.items():
                if label not in qe_cell_p:
                    found_match = pattern.match(file_content[line_idx])
                    if found_match is not None:
                        qe_cell_p[label] = match_type(found_match.groups()[0])
            celldm_match = celldm_pattern.match(file_content[line_idx])
            if celldm_match is not None:
                qe_cell_p[int(celldm_match.groups()[0])] = float(celldm_match.groups()[1])
            if "/" in file_content[line_idx]:
                namelist_finished = True
            line_idx += 1
        # TO-DO: implement more ibrav-parameters:
        if qe_cell_p["ibrav"] == 8:
            if 1 in qe_cell_p and 2 in qe_cell_p and 3 in qe_cell_p:
                unit_cell = [
                    [qe_cell_p[1], 0.0, 0.0],
                    [0.0, qe_cell_p[1] * qe_cell_p[2], 0.0],
                    [0.0, 0.0, qe_cell_p[1] * qe_cell_p[3]],
                ]
            elif "A" in qe_cell_p and "B" in qe_cell_p and "C" in qe_cell_p:
                unit_cell = [
                    [qe_cell_p["A"], 0.0, 0.0],
                    [0.0, qe_cell_p["B"], 0.0],
                    [0.0, 0.0, qe_cell_p["C"]],
                ]
            else:
                raise ValueError(f"Could not retrieve unit cell from ibrav {qe_cell_p['ibrav']}.")
        elif qe_cell_p["ibrav"] in (
            1,
            2,
            3,
            -3,
            4,
            5,
            -5,
            6,
            7,
            8,
            9,
            -9,
            91,
            10,
            11,
            12,
            -12,
            13,
            -13,
            14,
        ):
            raise ValueError(f"ibrav {qe_cell_p['ibrav']} not yet implemented...")
        else:
            unit_cell = qe_cell_p[1] if 1 in qe_cell_p else 1.0
        return line_idx, unit_cell

    def read_cell_parameters(file_content, line_idx, conv_factor):
        pattern = re.compile(
            r"^\s*([-+]?[0-9]*\.?[0-9]*)?\s*([-+]?[0-9]*\.?[0-9]*)?"
            r"\s*([-+]?[0-9]*\.?[0-9]*)?\s*$"
        )
        card_finished = False
        unit_cell = []
        while not card_finished:
            match = pattern.match(file_content[line_idx])
            if match is not None and any(match.groups()):
                unit_cell.append([float(m_pos) * conv_factor for m_pos in match.groups()])
            else:
                card_finished = True
            line_idx += 1
        return line_idx, unit_cell

    def read_atomic_positions(file_content, line_idx):
        pattern = re.compile(
            r"^\s*(\w+)?\s*([-+]?[0-9]*\.?[0-9]*)?\s*([-+]?"
            r"[0-9]*\.?[0-9]*)?\s*([-+]?[0-9]*\.?[0-9]*)?.*$"
        )
        card_finished = False
        conv_factor = 1.0
        is_cartesian = True
        positions = []
        elements = []
        if "bohr" in file_content[line_idx - 1].lower():
            conv_factor = length.Bohr
        elif "crystal" in file_content[line_idx - 1].lower():
            is_cartesian = False

        while not card_finished:
            match = pattern.match(file_content[line_idx])
            if match is not None and any(match.groups()):
                try:
                    positions.append([float(m_pos) * conv_factor for m_pos in match.groups()[1:]])
                    elements.append(match.groups()[0])
                except ValueError:
                    card_finished = True
            else:
                card_finished = True
            line_idx += 1
        return line_idx, elements, positions, is_cartesian

    struct_dict = {"pbc": [True, True, True]}
    with custom_open(file_name, "r") as input_file:
        file_content = input_file.read().splitlines()
    line_idx = 0
    while line_idx < len(file_content):  # line in enumerate(file_content):
        if "&SYSTEM" in file_content[line_idx]:
            line_idx, struct_dict["cell"] = read_system_namelist(file_content, line_idx + 1)
        if "CELL_PARAMETERS" in file_content[line_idx] and isinstance(struct_dict["cell"], float):
            conv_factor = struct_dict["cell"]
            if len(file_content[line_idx].split()) > 1:
                if file_content[line_idx].split()[-1].lower() == "bohr":
                    conv_factor = length.Bohr
                elif file_content[line_idx].split()[-1].lower() == "alat":
                    conv_factor *= length.Bohr
            line_idx, struct_dict["cell"] = read_cell_parameters(
                file_content, line_idx + 1, conv_factor
            )
        if "ATOMIC_POSITIONS" in file_content[line_idx]:
            (
                line_idx,
                struct_dict["elements"],
                struct_dict["positions"],
                struct_dict["is_cartesian"],
            ) = read_atomic_positions(file_content, line_idx + 1)
        line_idx += 1
    return struct_dict




[docs]
def read_band_structure(file_name):
    """
    Read band structure file from Quantum ESPRESSO.
    Spin-polarized calculations are not yet supported.

    Parameters
    ----------
    file_name : str
        Path of the output-file of Quantum ESPRESSO containing the band structure.

    Returns
    -------
    band_structure : dict
        Dictionary containing the k-path and th eigenvalues as well as the occupations.
    """
    kpoints = []
    bands = []
    with custom_open(file_name, "r") as bands_file:
        nr_bands = 0
        current_bands = []
        parse_kpoint = True
        for line in bands_file:
            line_split = line.split()
            # Catch the number of bands and k-points at the beginning of the file:
            if line.startswith(" &plot"):
                nr_bands = int(line_split[2][:-1])
                parse_kpoint = True
            # Parse k-point:
            elif parse_kpoint:
                kpoints.append([float(line_split[0]), float(line_split[1]), float(line_split[2])])
                parse_kpoint = False
            else:
                current_bands += [float(eigenvalue) for eigenvalue in line_split]
                if len(current_bands) == nr_bands:
                    parse_kpoint = True
                    bands.append(current_bands)
                    current_bands = []
    return {"kpoints": kpoints, "unit_y": "eV", "bands": bands}




[docs]
def read_total_density_of_states(file_name):
    """
    Read the total density of states from Quantum ESPRESSO.

    Parameters
    ----------
    file_name : str
        Path of the output-file of Quantum ESPRESSO containing the total density of states.

    Returns
    -------
    pdos : dict
        Dictionary containing the projected density of states for each atom.
    """
    energy = []
    tdos = []
    e_fermi = None
    with custom_open(file_name, "r") as tdos_file:
        for line in tdos_file:
            line_split = line.split()
            if not line.startswith("#"):
                energy.append(float(line_split[0]))
                tdos.append(float(line_split[1]))
            else:
                e_fermi = float(line_split[-2])
    return {"energy": energy, "tdos": tdos, "unit_x": "eV", "e_fermi": e_fermi}




[docs]
@read_multiple(
    pattern=r"^.*pdos_atm#(?P<at_idx>\d*)?\((?P<el>[a-zA-Z]*)"
    + r"?\)\_wfc\#(?P<orb_idx>\d*)?\((?P<orb>[a-z])?\)$"
)
def read_atom_proj_density_of_states(folder_path):
    """
    Read the projected density of states from Quantum ESPRESSO.

    Parameters
    ----------
    folder_path : str
        Path to the folder containing the pdos ouput-files.

    Returns
    -------
    pdos : dict
        Dictionary containing the projected density of states for each atom.
    """
    quantum_numbers = {
        "s": ("s"),
        "p": ("px", "py", "pz"),
        "d": ("d-2", "d-1", "d0", "d+1", "d+2"),
        "f": (),  # magnetic qn need to be added here
    }

    energy = []
    atomic_pdos = []
    kind_indices = {}

    indices = [(val, idx) for idx, val in enumerate(folder_path["at_idx"])]
    indices.sort(key=lambda point: point[0])
    _, indices = zip(*indices)
    for idx in indices:
        # Get regex details:
        at_idx = folder_path["at_idx"][idx]
        el = folder_path["el"][idx]
        orb = folder_path["orb"][idx]
        orb_idx = folder_path["orb_idx"][idx]

        # Check which kind the pdos belongs to:
        if at_idx not in kind_indices:
            kind_indices[at_idx] = len(atomic_pdos)
            atomic_pdos.append({"kind": el + "_" + at_idx})

        # The energy is only parsed from the first file, we assume the same energy range:
        parse_energy = len(energy) == 0

        # Read pdos, we only read the orbital contributions here, the summation is performed in
        # the plotting-class:
        with custom_open(folder_path["file"][idx], "r") as pdos_file:

            # Get inof from regex:
            qn_labels = quantum_numbers[orb]

            # Create empty lists for each quantum number:
            for qn_label in qn_labels:
                atomic_pdos[kind_indices[at_idx]][orb_idx + "_" + qn_label] = []

            # Iterate over lines and fill the list:
            for line in pdos_file:
                if not line.startswith("#"):
                    line_split = line.split()
                    if parse_energy:
                        energy.append(float(line_split[0]))
                    for qn_idx in range(len(qn_labels)):
                        qn_label = orb_idx + "_" + qn_labels[qn_idx]

                        # Fix bug in output when exponential is too small, e.g.: 0.292-105 instead
                        # of 0.292E-105
                        float_number = 0.0
                        try:
                            float_number = float(line_split[2 + qn_idx])
                        except ValueError:
                            pass
                        atomic_pdos[kind_indices[at_idx]].setdefault(qn_label, []).append(
                            float_number
                        )

    return {"energy": energy, "pdos": atomic_pdos, "unit_x": "eV"}