Source code for atomate2.lobster.schemas

"""Module defining lobster document schemas."""

import gzip
import json
import logging
import time
from pathlib import Path
from typing import Any, Optional, Union

import numpy as np
from emmet.core.structure import StructureMetadata
from monty.dev import requires
from monty.json import MontyDecoder, jsanitize

# TODO: remove this kludge when monty is fixed
from monty.os.path import zpath as monty_zpath
from pydantic import BaseModel, Field
from pymatgen.core import Structure
from pymatgen.electronic_structure.cohp import Cohp, CompleteCohp
from pymatgen.electronic_structure.dos import LobsterCompleteDos
from pymatgen.io.lobster import (
    Bandoverlaps,
    Charge,
    Doscar,
    Grosspop,
    Icohplist,
    Lobsterin,
    Lobsterout,
    MadelungEnergies,
    SitePotential,
)
from typing_extensions import Self

from atomate2 import __version__
from atomate2.utils.datetime import datetime_str

try:
    import ijson
    from lobsterpy.cohp.analyze import Analysis
    from lobsterpy.cohp.describe import Description
except ImportError:
    ijson = None
    Analysis = None
    Description = None


logger = logging.getLogger(__name__)



[docs]
def zpath(pathname: Union[str, Path]) -> str:
    """Kludge to fix monty zpath bug."""
    return monty_zpath(str(pathname))




[docs]
class LobsteroutModel(BaseModel):
    """Definition of computational settings from the LOBSTER computation."""

    restart_from_projection: Optional[bool] = Field(
        None,
        description="Bool indicating if the run has been restarted from a projection",
    )
    lobster_version: Optional[str] = Field(None, description="Lobster version")
    threads: Optional[int] = Field(
        None, description="Number of threads that Lobster ran on"
    )
    dft_program: Optional[str] = Field(
        None, description="DFT program was used for this run"
    )
    charge_spilling: list[float] = Field(description="Absolute charge spilling")
    total_spilling: list[float] = Field(description="Total spilling")
    elements: list[str] = Field(description="Elements in structure")
    basis_type: list[str] = Field(description="Basis set used in Lobster")
    basis_functions: list[list[str]] = Field(description="basis_functions")
    timing: dict[str, dict[str, str]] = Field(description="Dict with infos on timing")
    warning_lines: Optional[list] = Field(None, description="Warnings")
    info_orthonormalization: Optional[list] = Field(
        None, description="additional information on orthonormalization"
    )
    info_lines: Optional[list] = Field(
        None, description="list of strings with additional info lines"
    )
    has_doscar: Optional[bool] = Field(
        None, description="Bool indicating if DOSCAR is present."
    )
    has_doscar_lso: Optional[bool] = Field(
        None, description="Bool indicating if DOSCAR.LSO is present."
    )
    has_cohpcar: Optional[bool] = Field(
        None, description="Bool indicating if COHPCAR is present."
    )
    has_coopcar: Optional[bool] = Field(
        None, description="Bool indicating if COOPCAR is present."
    )
    has_cobicar: Optional[bool] = Field(
        None, description="Bool indicating if COBICAR is present."
    )
    has_charge: Optional[bool] = Field(
        None, description="Bool indicating if CHARGE is present."
    )
    has_madelung: Optional[bool] = Field(
        None,
        description="Bool indicating if Site Potentials and Madelung file is present.",
    )
    has_projection: Optional[bool] = Field(
        None, description="Bool indicating if projection file is present."
    )
    has_bandoverlaps: Optional[bool] = Field(
        None, description="Bool indicating if BANDOVERLAPS file is present"
    )
    has_fatbands: Optional[bool] = Field(
        None, description="Bool indicating if Fatbands are present."
    )
    has_grosspopulation: Optional[bool] = Field(
        None, description="Bool indicating if GROSSPOP file is present."
    )
    has_density_of_energies: Optional[bool] = Field(
        None, description="Bool indicating if DensityofEnergies is present"
    )




[docs]
class LobsterinModel(BaseModel):
    """Definition of input settings for the LOBSTER computation."""

    cohpstartenergy: float = Field(description="Start energy for COHP computation")
    cohpendenergy: float = Field(description="End energy for COHP computation")

    gaussiansmearingwidth: Optional[float] = Field(
        None, description="Set the smearing width in eV,default is 0.2 (eV)"
    )
    usedecimalplaces: Optional[int] = Field(
        None,
        description="Set the decimal places to print in output files, default is 5",
    )
    cohpsteps: Optional[float] = Field(
        None, description="Number steps in COHPCAR; similar to NEDOS of VASP"
    )
    basisset: str = Field(description="basis set of computation")
    cohpgenerator: str = Field(
        description="Build the list of atom pairs to be analyzed using given distance"
    )
    saveprojectiontofile: Optional[bool] = Field(
        None, description="Save the results of projections"
    )
    lsodos: Optional[bool] = Field(
        None, description="Writes DOS output from the orthonormalized LCAO basis"
    )
    basisfunctions: list[str] = Field(
        description="Specify the basis functions for element"
    )




[docs]
class Bonding(BaseModel):
    """Model describing bonding field of BondsInfo."""

    integral: Optional[float] = Field(
        None, description="Integral considering only bonding contributions from COHPs"
    )
    perc: Optional[float] = Field(
        None, description="Percentage of bonding contribution"
    )




[docs]
class Antibonding(BaseModel):
    """Model describing antibonding field of BondsInfo."""

    integral: Optional[float] = Field(
        None,
        description="Integral considering only anti-bonding contributions from COHPs",
    )
    perc: Optional[float] = Field(
        None, description="Percentage of anti-bonding contribution"
    )




[docs]
class BondsInfo(BaseModel):
    """Model describing bonds field of SiteInfo."""

    ICOHP_mean: str = Field(..., description="Mean of ICOHPs of relevant bonds")
    ICOHP_sum: str = Field(..., description="Sum of ICOHPs of relevant bonds")
    has_antibdg_states_below_Efermi: bool = Field(  # noqa: N815
        ...,
        description="Indicates if antibonding interactions below efermi are detected",
    )
    number_of_bonds: int = Field(
        ..., description="Number of bonds considered in the analysis"
    )
    bonding: Bonding = Field(description="Model describing bonding contributions")
    antibonding: Antibonding = Field(
        description="Model describing anti-bonding contributions"
    )




[docs]
class SiteInfo(BaseModel):
    """Outer model describing sites field of Sites model."""

    env: str = Field(
        ...,
        description="The coordination environment identified from "
        "the LobsterPy analysis",
    )
    bonds: dict[str, BondsInfo] = Field(
        ...,
        description="A dictionary with keys as atomic-specie as key "
        "and BondsInfo model as values",
    )
    ion: str = Field(..., description="Ion to which the atom is bonded")
    charge: float = Field(..., description="Mulliken charge of the atom")
    relevant_bonds: list[str] = Field(
        ...,
        description="List of bond labels from the LOBSTER files i.e. for e.g. "
        " from ICOHPLIST.lobster/ COHPCAR.lobster",
    )




[docs]
class Sites(BaseModel):
    """Model describing, sites field of CondensedBondingAnalysis."""

    sites: dict[int, SiteInfo] = Field(
        ...,
        description="A dictionary with site index as keys and SiteInfo model as values",
    )




[docs]
class CohpPlotData(BaseModel):
    """Model describing the cohp_plot_data field of CondensedBondingAnalysis."""

    data: dict[str, Cohp] = Field(
        ...,
        description="A dictionary with plot labels from LobsterPy "
        "automatic analysis as keys and Cohp objects as values",
    )




[docs]
class DictIons(BaseModel):
    """Model describing final_dict_ions field of CondensedBondingAnalysis."""

    data: dict[str, dict[str, int]] = Field(
        ...,
        description="Dict consisting information on environments of cations "
        "and counts for them",
    )




[docs]
class DictBonds(BaseModel):
    """Model describing final_dict_bonds field of CondensedBondingAnalysis."""

    data: dict[str, dict[str, Union[float, bool]]] = Field(
        ..., description="Dict consisting information on ICOHPs per bond type"
    )




[docs]
class CondensedBondingAnalysis(BaseModel):
    """Definition of condensed bonding analysis data from LobsterPy ICOHP."""

    formula: str = Field(description="Pretty formula of the structure")
    max_considered_bond_length: float = Field(
        description="Maximum bond length considered in bonding analysis"
    )
    limit_icohp: list[Union[str, float]] = Field(
        description="ICOHP range considered in co-ordination environment analysis"
    )
    number_of_considered_ions: int = Field(
        ..., description="number of ions detected based on Mulliken/Löwdin Charges"
    )
    sites: Sites = Field(
        ...,
        description="Bonding information at inequivalent sites in the structure",
    )
    type_charges: str = Field(
        description="Charge type considered for assigning valences in bonding analysis"
    )
    cutoff_icohp: float = Field(
        description="Percent limiting the ICOHP values to be considered"
        " relative to strongest ICOHP",
    )
    summed_spins: bool = Field(
        description="Bool that states if the spin channels in the "
        "cohp_plot_data are summed.",
    )
    start: Optional[float] = Field(
        None,
        description="Sets the lower limit of energy relative to Fermi for evaluating"
        " bonding/anti-bonding percentages in the bond"
        " if set to None, all energies up-to the Fermi is considered",
    )
    cohp_plot_data: CohpPlotData = Field(
        ...,
        description="Plotting data for the relevant bonds from LobsterPy analysis",
    )
    which_bonds: str = Field(
        description="Specifies types of bond considered in LobsterPy analysis",
    )
    final_dict_bonds: DictBonds = Field(
        ...,
        description="Dict consisting information on ICOHPs per bond type",
    )
    final_dict_ions: DictIons = Field(
        ...,
        description="Model that describes final_dict_ions field",
    )
    run_time: float = Field(
        ..., description="Time needed to run Lobsterpy condensed bonding analysis"
    )


[docs]
    @classmethod
    def from_directory(
        cls,
        dir_name: Union[str, Path],
        save_cohp_plots: bool = True,
        lobsterpy_kwargs: dict = None,
        plot_kwargs: dict = None,
        which_bonds: str = "all",
    ) -> tuple:
        """Create a task document from a directory containing LOBSTER files.

        Parameters
        ----------
        dir_name : path or str
            The path to the folder containing the calculation outputs.
        save_cohp_plots : bool.
            Bool to indicate whether automatic cohp plots and jsons
            from lobsterpy will be generated.
        lobsterpy_kwargs : dict.
            kwargs to change default lobsterpy automatic analysis parameters.
        plot_kwargs : dict.
            kwargs to change plotting options in lobsterpy.
        which_bonds: str.
            mode for condensed bonding analysis: "cation-anion" and "all".
        """
        plot_kwargs = plot_kwargs or {}
        lobsterpy_kwargs = lobsterpy_kwargs or {}
        dir_name = Path(dir_name)
        cohpcar_path = Path(zpath(str((dir_name / "COHPCAR.lobster").as_posix())))
        charge_path = Path(zpath(str((dir_name / "CHARGE.lobster").as_posix())))
        structure_path = Path(zpath(str((dir_name / "CONTCAR").as_posix())))
        icohplist_path = Path(zpath(str((dir_name / "ICOHPLIST.lobster").as_posix())))
        icobilist_path = Path(zpath(str((dir_name / "ICOBILIST.lobster").as_posix())))
        icooplist_path = Path(zpath(str((dir_name / "ICOOPLIST.lobster").as_posix())))

        # Update lobsterpy analysis parameters with user supplied parameters
        lobsterpy_kwargs_updated = {
            "are_cobis": False,
            "are_coops": False,
            "cutoff_icohp": 0.10,
            "noise_cutoff": 0.1,
            "orbital_cutoff": 0.05,
            "orbital_resolved": False,
            "start": None,
            "summed_spins": False,  # we will always use spin polarization here
            "type_charge": None,
            **lobsterpy_kwargs,
        }

        try:
            start = time.time()
            analyse = Analysis(
                path_to_poscar=structure_path,
                path_to_icohplist=icohplist_path,
                path_to_cohpcar=cohpcar_path,
                path_to_charge=charge_path,
                which_bonds=which_bonds,
                **lobsterpy_kwargs_updated,
            )
            cba_run_time = time.time() - start
            # initialize lobsterpy condensed bonding analysis
            cba = analyse.condensed_bonding_analysis

            cba_cohp_plot_data = {}  # Initialize dict to store plot data

            seq_cohps = analyse.seq_cohps
            seq_labels_cohps = analyse.seq_labels_cohps
            seq_ineq_cations = analyse.seq_ineq_ions
            struct = analyse.structure

            for _iplot, (ication, labels, cohps) in enumerate(
                zip(seq_ineq_cations, seq_labels_cohps, seq_cohps, strict=True)
            ):
                label_str = f"{struct[ication].specie!s}{ication + 1!s}: "
                for label, cohp in zip(labels, cohps, strict=True):
                    if label is not None:
                        cba_cohp_plot_data[label_str + label] = cohp

            describe = Description(analysis_object=analyse)
            limit_icohp_val = list(cba["limit_icohp"])
            _replace_inf_values(limit_icohp_val)

            condensed_bonding_analysis = CondensedBondingAnalysis(
                formula=cba["formula"],
                max_considered_bond_length=cba["max_considered_bond_length"],
                limit_icohp=limit_icohp_val,
                number_of_considered_ions=cba["number_of_considered_ions"],
                sites=Sites(**cba),
                type_charges=analyse.type_charge,
                cohp_plot_data=CohpPlotData(data=cba_cohp_plot_data),
                cutoff_icohp=analyse.cutoff_icohp,
                summed_spins=lobsterpy_kwargs_updated.get("summed_spins"),
                which_bonds=analyse.which_bonds,
                final_dict_bonds=DictBonds(data=analyse.final_dict_bonds),
                final_dict_ions=DictIons(data=analyse.final_dict_ions),
                run_time=cba_run_time,
            )
            if save_cohp_plots:
                describe.plot_cohps(
                    save=True,
                    filename=f"automatic_cohp_plots_{which_bonds}.pdf",
                    hide=True,
                    **plot_kwargs,
                )
                import json

                filename = dir_name / f"condensed_bonding_analysis_{which_bonds}"
                with open(f"{filename}.json", "w") as fp:
                    json.dump(analyse.condensed_bonding_analysis, fp)
                with open(f"{filename}.txt", "w") as fp:
                    for line in describe.text:
                        fp.write(f"{line}\n")

            # Read in strongest icohp values
            sb = _identify_strongest_bonds(
                analyse=analyse,
                icobilist_path=icobilist_path,
                icohplist_path=icohplist_path,
                icooplist_path=icooplist_path,
            )

        except ValueError:
            return None, None, None
        else:
            return condensed_bonding_analysis, describe, sb





[docs]
class DosComparisons(BaseModel):
    """Model describing the DOS comparisons field in the CalcQualitySummary model."""

    tanimoto_orb_s: Optional[float] = Field(
        None,
        description="Tanimoto similarity index between s orbital of "
        "VASP and LOBSTER DOS",
    )
    tanimoto_orb_p: Optional[float] = Field(
        None,
        description="Tanimoto similarity index between p orbital of "
        "VASP and LOBSTER DOS",
    )
    tanimoto_orb_d: Optional[float] = Field(
        None,
        description="Tanimoto similarity index between d orbital of "
        "VASP and LOBSTER DOS",
    )
    tanimoto_orb_f: Optional[float] = Field(
        None,
        description="Tanimoto similarity index between f orbital of "
        "VASP and LOBSTER DOS",
    )
    tanimoto_summed: Optional[float] = Field(
        None,
        description="Tanimoto similarity index for summed PDOS between "
        "VASP and LOBSTER",
    )
    e_range: list[Union[float, None]] = Field(
        description="Energy range used for evaluating the Tanimoto similarity index"
    )
    n_bins: Optional[int] = Field(
        None,
        description="Number of bins used for discretizing the VASP and LOBSTER PDOS"
        "(Affects the Tanimoto similarity index)",
    )




[docs]
class ChargeComparisons(BaseModel):
    """Model describing the charges field in the CalcQualitySummary model."""

    bva_mulliken_agree: Optional[bool] = Field(
        None,
        description="Bool indicating whether atoms classification as cation "
        "or anion based on Mulliken charge signs of LOBSTER "
        "agree with BVA analysis",
    )
    bva_loewdin_agree: Optional[bool] = Field(
        None,
        description="Bool indicating whether atoms classification as cations "
        "or anions based on Loewdin charge signs of LOBSTER "
        "agree with BVA analysis",
    )




[docs]
class BandOverlapsComparisons(BaseModel):
    """Model describing the Band overlaps field in the CalcQualitySummary model."""

    file_exists: bool = Field(
        description="Boolean indicating whether the bandOverlaps.lobster "
        "file is generated during the LOBSTER run",
    )
    limit_maxDeviation: Optional[float] = Field(  # noqa: N815
        None,
        description="Limit set for maximal deviation in pymatgen parser",
    )
    has_good_quality_maxDeviation: Optional[bool] = Field(  # noqa: N815
        None,
        description="Boolean indicating whether the deviation at each k-point "
        "is within the threshold set using limit_maxDeviation "
        "for analyzing the bandOverlaps.lobster file data",
    )
    max_deviation: Optional[float] = Field(
        None,
        description="Maximum deviation from ideal identity matrix from the observed in "
        "the bandOverlaps.lobster file",
    )
    percent_kpoints_abv_limit: Optional[float] = Field(
        None,
        description="Percent of k-points that show deviations above "
        "the limit_maxDeviation threshold set in pymatgen parser.",
    )




[docs]
class ChargeSpilling(BaseModel):
    """Model describing the Charge spilling field in the CalcQualitySummary model."""

    abs_charge_spilling: float = Field(
        description="Absolute charge spilling value from the LOBSTER calculation.",
    )
    abs_total_spilling: float = Field(
        description="Total charge spilling percent from the LOBSTER calculation.",
    )




[docs]
class CalcQualitySummary(BaseModel):
    """Model describing the calculation quality of lobster run."""

    minimal_basis: bool = Field(
        description="Denotes whether the calculation used the minimal basis for the "
        "LOBSTER computation",
    )
    charge_spilling: ChargeSpilling = Field(
        description="Model describing the charge spilling from the LOBSTER runs",
    )
    charge_comparisons: Optional[ChargeComparisons] = Field(
        None,
        description="Model describing the charge sign comparison results",
    )
    band_overlaps_analysis: Optional[BandOverlapsComparisons] = Field(
        None,
        description="Model describing the band overlap file analysis results",
    )
    dos_comparisons: Optional[DosComparisons] = Field(
        None,
        description="Model describing the VASP and LOBSTER PDOS comparisons results",
    )


[docs]
    @classmethod
    @requires(Analysis, "lobsterpy must be installed to create an CalcQualitySummary.")
    def from_directory(
        cls,
        dir_name: Union[Path, str],
        calc_quality_kwargs: dict = None,
    ) -> Self:
        """Make a LOBSTER calculation quality summary from directory with LOBSTER files.

        Parameters
        ----------
        dir_name : path or str
            The path to the folder containing the calculation outputs.
        calc_quality_kwargs : dict
            kwargs to change calc quality analysis options in lobsterpy.

        Returns
        -------
        CalcQualitySummary
            A task document summarizing quality of the lobster calculation.
        """
        dir_name = Path(dir_name)
        calc_quality_kwargs = calc_quality_kwargs or {}
        band_overlaps_path = Path(
            zpath(str((dir_name / "bandOverlaps.lobster").as_posix()))
        )
        charge_path = Path(zpath(str((dir_name / "CHARGE.lobster").as_posix())))
        doscar_path = Path(
            zpath(str((dir_name / "DOSCAR.LSO.lobster").as_posix()))
            if Path(zpath(str((dir_name / "DOSCAR.LSO.lobster").as_posix()))).exists()
            else Path(zpath(str((dir_name / "DOSCAR.lobster").as_posix())))
        )
        lobsterin_path = Path(zpath(str((dir_name / "lobsterin").as_posix())))
        lobsterout_path = Path(zpath(str((dir_name / "lobsterout").as_posix())))
        potcar_path = (
            Path(zpath(str((dir_name / "POTCAR").as_posix())))
            if Path(zpath(str((dir_name / "POTCAR").as_posix()))).exists()
            else None
        )
        structure_path = Path(zpath(str((dir_name / "CONTCAR").as_posix())))
        vasprun_path = Path(zpath(str((dir_name / "vasprun.xml").as_posix())))

        # Update calc quality kwargs supplied by user
        calc_quality_kwargs_updated = {
            "e_range": [-20, 0],
            "dos_comparison": True,
            "n_bins": 256,
            "bva_comp": True,
            **calc_quality_kwargs,
        }

        cal_quality_dict = Analysis.get_lobster_calc_quality_summary(
            path_to_poscar=structure_path,
            path_to_vasprun=vasprun_path,
            path_to_charge=charge_path,
            path_to_potcar=potcar_path,
            path_to_doscar=doscar_path,
            path_to_lobsterin=lobsterin_path,
            path_to_lobsterout=lobsterout_path,
            path_to_bandoverlaps=band_overlaps_path,
            **calc_quality_kwargs_updated,
        )
        return CalcQualitySummary(**cal_quality_dict)





[docs]
class StrongestBonds(BaseModel):
    """Strongest bonds extracted from ICOHPLIST/ICOOPLIST/ICOBILIST from LOBSTER.

    LobsterPy is used for the extraction.
    """

    which_bonds: Optional[str] = Field(
        None,
        description="Denotes whether the information "
        "is for cation-anion pairs or all bonds",
    )
    strongest_bonds_icoop: Optional[dict] = Field(
        None,
        description="Dict with infos on bond strength and bond length based on ICOOP.",
    )
    strongest_bonds_icohp: Optional[dict] = Field(
        None,
        description="Dict with infos on bond strength and bond length based on ICOHP.",
    )
    strongest_bonds_icobi: Optional[dict] = Field(
        None,
        description="Dict with infos on bond strength and bond length based on ICOBI.",
    )




[docs]
class LobsterTaskDocument(StructureMetadata, extra="allow"):  # type: ignore[call-arg]
    """Definition of LOBSTER task document."""

    structure: Structure = Field(description="The structure used in this task")
    dir_name: Union[str, Path] = Field(
        description="The directory for this Lobster task"
    )
    last_updated: str = Field(
        default_factory=datetime_str,
        description="Timestamp for this task document was last updated",
    )
    charges: Optional[Charge] = Field(
        None,
        description="pymatgen Charge obj. Contains atomic charges based on Mulliken "
        "and Loewdin charge analysis",
    )
    lobsterout: LobsteroutModel = Field(description="Lobster out data")
    lobsterin: LobsterinModel = Field(description="Lobster calculation inputs")
    lobsterpy_data: Optional[CondensedBondingAnalysis] = Field(
        None, description="Model describing the LobsterPy data"
    )
    lobsterpy_text: Optional[str] = Field(
        None, description="Stores LobsterPy automatic analysis summary text"
    )
    calc_quality_summary: Optional[CalcQualitySummary] = Field(
        None,
        description="Model summarizing results of lobster runs like charge spillings, "
        "band overlaps, DOS comparisons with VASP runs and quantum chemical LOBSTER "
        "charge sign comparisons with BVA method",
    )
    calc_quality_text: Optional[str] = Field(
        None, description="Stores calculation quality analysis summary text"
    )
    strongest_bonds: Optional[StrongestBonds] = Field(
        None,
        description="Describes the strongest cation-anion ICOOP, ICOBI and ICOHP bonds",
    )
    lobsterpy_data_cation_anion: Optional[CondensedBondingAnalysis] = Field(
        None, description="Model describing the LobsterPy data"
    )
    lobsterpy_text_cation_anion: Optional[str] = Field(
        None,
        description="Stores LobsterPy automatic analysis summary text",
    )
    strongest_bonds_cation_anion: Optional[StrongestBonds] = Field(
        None,
        description="Describes the strongest cation-anion ICOOP, ICOBI and ICOHP bonds",
    )
    dos: Optional[LobsterCompleteDos] = Field(
        None, description="pymatgen pymatgen.io.lobster.Doscar.completedos data"
    )
    lso_dos: Optional[LobsterCompleteDos] = Field(
        None, description="pymatgen pymatgen.io.lobster.Doscar.completedos data"
    )
    madelung_energies: Optional[MadelungEnergies] = Field(
        None,
        description="pymatgen Madelung energies obj. Contains madelung energies"
        "based on Mulliken and Loewdin charges",
    )
    site_potentials: Optional[SitePotential] = Field(
        None,
        description="pymatgen Site potentials obj. Contains site potentials "
        "based on Mulliken and Loewdin charges",
    )
    gross_populations: Optional[Grosspop] = Field(
        None,
        description="pymatgen Grosspopulations obj. Contains gross populations "
        " based on Mulliken and Loewdin charges ",
    )
    band_overlaps: Optional[Bandoverlaps] = Field(
        None,
        description="pymatgen Bandoverlaps obj for each k-point from"
        " bandOverlaps.lobster file if it exists",
    )
    cohp_data: Optional[CompleteCohp] = Field(
        None, description="pymatgen CompleteCohp object with COHP data"
    )
    coop_data: Optional[CompleteCohp] = Field(
        None, description="pymatgen CompleteCohp object with COOP data"
    )
    cobi_data: Optional[CompleteCohp] = Field(
        None, description="pymatgen CompleteCohp object with COBI data"
    )
    icohp_list: Optional[Icohplist] = Field(
        None, description="pymatgen Icohplist object with ICOHP data"
    )
    icoop_list: Optional[Icohplist] = Field(
        None, description="pymatgen Icohplist object with ICOOP data"
    )
    icobi_list: Optional[Icohplist] = Field(
        None, description="pymatgen Icohplist object with ICOBI data"
    )

    atomate2_version: str = Field(
        __version__, description="Version of atomate2 used to create the document"
    )


[docs]
    @classmethod
    @requires(
        Analysis,
        "LobsterTaskDocument requires lobsterpy and ijson to function properly. "
        "Please reinstall atomate2 using atomate2[lobster]",
    )
    def from_directory(
        cls,
        dir_name: Union[Path, str],
        additional_fields: dict = None,
        add_coxxcar_to_task_document: bool = False,
        analyze_outputs: bool = True,
        calc_quality_kwargs: dict = None,
        lobsterpy_kwargs: dict = None,
        plot_kwargs: dict = None,
        store_lso_dos: bool = False,
        save_cohp_plots: bool = True,
        save_cba_jsons: bool = True,
        save_computational_data_jsons: bool = False,
    ) -> Self:
        """Create a task document from a directory containing LOBSTER files.

        Parameters
        ----------
        dir_name : path or str.
            The path to the folder containing the calculation outputs.
        additional_fields : dict.
            Dictionary of additional fields to add to output document.
        add_coxxcar_to_task_document : bool.
            Bool to indicate whether to add COHPCAR, COOPCAR, COBICAR data objects
            to the task document.
        analyze_outputs: bool.
            If True, will enable lobsterpy analysis.
        calc_quality_kwargs : dict.
            kwargs to change calc quality summary options in lobsterpy.
        lobsterpy_kwargs : dict.
            kwargs to change default lobsterpy automatic analysis parameters.
        plot_kwargs : dict.
            kwargs to change plotting options in lobsterpy.
        store_lso_dos : bool.
            Whether to store the LSO DOS.
        save_cohp_plots : bool.
            Bool to indicate whether automatic cohp plots and jsons
            from lobsterpy will be generated.
        save_cba_jsons : bool.
            Bool to indicate whether condensed bonding analysis jsons
            should be saved, consists of outputs from lobsterpy analysis,
            calculation quality summary, lobster dos, charges and madelung energies
        save_computational_data_jsons : bool.
            Bool to indicate whether computational data jsons
            should be saved

        Returns
        -------
        LobsterTaskDocument
            A task document for the lobster calculation.
        """
        additional_fields = {} if additional_fields is None else additional_fields
        dir_name = Path(dir_name)

        # Read in lobsterout and lobsterin
        lobsterout_doc = Lobsterout(
            Path(zpath(str((dir_name / "lobsterout").as_posix())))
        ).get_doc()
        lobster_out = LobsteroutModel(**lobsterout_doc)
        lobster_in = LobsterinModel(
            **Lobsterin.from_file(Path(zpath(str((dir_name / "lobsterin").as_posix()))))
        )

        icohplist_path = Path(zpath(str((dir_name / "ICOHPLIST.lobster").as_posix())))
        icooplist_path = Path(zpath(str((dir_name / "ICOOPLIST.lobster").as_posix())))
        icobilist_path = Path(zpath(str((dir_name / "ICOBILIST.lobster").as_posix())))
        cohpcar_path = Path(zpath(str((dir_name / "COHPCAR.lobster").as_posix())))
        charge_path = Path(zpath(str((dir_name / "CHARGE.lobster").as_posix())))
        cobicar_path = Path(zpath(str((dir_name / "COBICAR.lobster").as_posix())))
        coopcar_path = Path(zpath(str((dir_name / "COOPCAR.lobster").as_posix())))
        doscar_path = Path(zpath(str((dir_name / "DOSCAR.lobster").as_posix())))
        structure_path = Path(zpath(str((dir_name / "CONTCAR").as_posix())))
        madelung_energies_path = Path(
            zpath(str((dir_name / "MadelungEnergies.lobster").as_posix()))
        )
        site_potentials_path = Path(
            zpath(str((dir_name / "SitePotentials.lobster").as_posix()))
        )
        gross_populations_path = Path(
            zpath(str((dir_name / "GROSSPOP.lobster").as_posix()))
        )
        band_overlaps_path = Path(
            zpath(str((dir_name / "bandOverlaps.lobster").as_posix()))
        )

        icohp_list = icoop_list = icobi_list = None
        if icohplist_path.exists():
            icohp_list = Icohplist(filename=icohplist_path)
        if icooplist_path.exists():
            icoop_list = Icohplist(filename=icooplist_path, are_coops=True)
        if icobilist_path.exists():
            icobi_list = Icohplist(filename=icobilist_path, are_cobis=True)

        # Do automatic bonding analysis with LobsterPy
        struct = Structure.from_file(structure_path)

        # will perform two condensed bonding analysis computations
        condensed_bonding_analysis = None
        condensed_bonding_analysis_ionic = None
        sb_all = None
        sb_ionic = None
        calc_quality_summary = None
        calc_quality_text = None
        describe = None
        describe_ionic = None

        if analyze_outputs and (
            icohplist_path.exists() and cohpcar_path.exists() and charge_path.exists()
        ):
            (
                condensed_bonding_analysis,
                describe,
                sb_all,
            ) = CondensedBondingAnalysis.from_directory(
                dir_name,
                save_cohp_plots=save_cohp_plots,
                plot_kwargs=plot_kwargs,
                lobsterpy_kwargs=lobsterpy_kwargs,
                which_bonds="all",
            )
            (
                condensed_bonding_analysis_ionic,
                describe_ionic,
                sb_ionic,
            ) = CondensedBondingAnalysis.from_directory(
                dir_name,
                save_cohp_plots=save_cohp_plots,
                plot_kwargs=plot_kwargs,
                lobsterpy_kwargs=lobsterpy_kwargs,
                which_bonds="cation-anion",
            )
            # Get lobster calculation quality summary data

            calc_quality_summary = CalcQualitySummary.from_directory(
                dir_name,
                calc_quality_kwargs=calc_quality_kwargs,
            )

            calc_quality_text = Description.get_calc_quality_description(
                calc_quality_summary.model_dump()
            )

        # Read in charges
        charges = None
        if charge_path.exists():
            charges = Charge(filename=charge_path)

        # Read in DOS
        dos = None
        if doscar_path.exists():
            doscar_lobster = Doscar(doscar=doscar_path, structure_file=structure_path)
            dos = doscar_lobster.completedos

        # Read in LSO DOS
        lso_dos = None
        doscar_lso_path = Path(zpath(str((dir_name / "DOSCAR.LSO.lobster").as_posix())))
        if store_lso_dos and doscar_lso_path.exists():
            doscar_lso_lobster = Doscar(
                doscar=doscar_lso_path, structure_file=structure_path
            )
            lso_dos = doscar_lso_lobster.completedos

        # Read in Madelung energies
        madelung_energies = None
        if madelung_energies_path.exists():
            madelung_energies = MadelungEnergies(filename=madelung_energies_path)

        # Read in Site Potentials
        site_potentials = None
        if site_potentials_path.exists():
            site_potentials = SitePotential(filename=site_potentials_path)

        # Read in Gross Populations
        gross_populations = None
        if gross_populations_path.exists():
            gross_populations = Grosspop(filename=gross_populations_path)

        # Read in Band overlaps
        band_overlaps = None
        if band_overlaps_path.exists():
            band_overlaps = Bandoverlaps(filename=band_overlaps_path)

        # Read in COHPCAR, COBICAR, COOPCAR
        cohp_obj = None
        coop_obj = None
        cobi_obj = None

        if add_coxxcar_to_task_document:
            if cohpcar_path.exists():
                cohp_obj = CompleteCohp.from_file(
                    fmt="LOBSTER",
                    structure_file=structure_path,
                    filename=cohpcar_path,
                    are_coops=False,
                    are_cobis=False,
                )

            if coopcar_path.exists():
                coop_obj = CompleteCohp.from_file(
                    fmt="LOBSTER",
                    structure_file=structure_path,
                    filename=coopcar_path,
                    are_coops=True,
                    are_cobis=False,
                )

            if cobicar_path.exists():
                cobi_obj = CompleteCohp.from_file(
                    fmt="LOBSTER",
                    structure_file=structure_path,
                    filename=cobicar_path,
                    are_coops=False,
                    are_cobis=True,
                )

        doc = cls.from_structure(
            structure=struct,
            meta_structure=struct,
            dir_name=dir_name,
            lobsterin=lobster_in,
            lobsterout=lobster_out,
            # include additional fields for cation-anion
            lobsterpy_data=condensed_bonding_analysis,
            lobsterpy_text=" ".join(describe.text) if describe is not None else None,
            strongest_bonds=sb_all,
            lobsterpy_data_cation_anion=condensed_bonding_analysis_ionic,
            lobsterpy_text_cation_anion=" ".join(describe_ionic.text)
            if describe_ionic is not None
            else None,
            strongest_bonds_cation_anion=sb_ionic,
            calc_quality_summary=calc_quality_summary
            if calc_quality_summary is not None
            else None,
            calc_quality_text=" ".join(calc_quality_text)
            if calc_quality_text is not None
            else None,
            dos=dos,
            lso_dos=lso_dos,
            charges=charges,
            madelung_energies=madelung_energies,
            site_potentials=site_potentials,
            gross_populations=gross_populations,
            band_overlaps=band_overlaps,
            # include additional fields for all bonds
            cohp_data=cohp_obj,
            coop_data=coop_obj,
            cobi_data=cobi_obj,
            icohp_list=icohp_list,
            icoop_list=icoop_list,
            icobi_list=icobi_list,
        )

        if save_cba_jsons and analyze_outputs:
            cba_json_save_dir = dir_name / "cba.json.gz"
            with gzip.open(cba_json_save_dir, "wt", encoding="UTF-8") as file:
                # Write the json in iterable format
                # (Necessary to load large JSON files via ijson)
                file.write("[")
                if (
                    doc.lobsterpy_data_cation_anion is not None
                ):  # check if cation-anion analysis failed
                    lobsterpy_analysis_type = (
                        doc.lobsterpy_data_cation_anion.which_bonds.replace("-", "_")
                    )
                    data = {
                        f"{lobsterpy_analysis_type}_bonds": {
                            "lobsterpy_data": doc.lobsterpy_data_cation_anion,
                            "lobsterpy_text": [
                                "".join(doc.lobsterpy_text_cation_anion)
                            ],
                            "strongest_bonds": doc.strongest_bonds_cation_anion,
                        }
                    }
                else:
                    data = {"cation_anion_bonds": {}}
                monty_encoded_json_doc = jsanitize(
                    data, allow_bson=True, strict=True, enum_values=True
                )
                json.dump(monty_encoded_json_doc, file)
                file.write(",")
                # add all-bonds data
                lobsterpy_analysis_type = doc.lobsterpy_data.which_bonds
                data = {
                    f"{lobsterpy_analysis_type}_bonds": {
                        "lobsterpy_data": doc.lobsterpy_data,
                        "lobsterpy_text": ["".join(doc.lobsterpy_text)],
                        "strongest_bonds": doc.strongest_bonds,
                    }
                }
                monty_encoded_json_doc = jsanitize(
                    data, allow_bson=True, strict=True, enum_values=True
                )
                json.dump(monty_encoded_json_doc, file)
                file.write(",")
                data = {
                    "madelung_energies": doc.madelung_energies
                }  # add madelung energies
                monty_encoded_json_doc = jsanitize(
                    data, allow_bson=True, strict=True, enum_values=True
                )
                json.dump(monty_encoded_json_doc, file)
                file.write(",")
                data = {"charges": doc.charges}  # add charges
                monty_encoded_json_doc = jsanitize(
                    data, allow_bson=True, strict=True, enum_values=True
                )
                json.dump(monty_encoded_json_doc, file)
                file.write(",")
                data = {
                    "calc_quality_summary": doc.calc_quality_summary
                }  # add calc quality summary dict
                monty_encoded_json_doc = jsanitize(
                    data, allow_bson=True, strict=True, enum_values=True
                )
                json.dump(monty_encoded_json_doc, file)
                file.write(",")
                data = {
                    "calc_quality_text": ["".join(doc.calc_quality_text)]  # type: ignore[dict-item]
                }  # add calc quality summary dict
                monty_encoded_json_doc = jsanitize(
                    data, allow_bson=True, strict=True, enum_values=True
                )
                json.dump(monty_encoded_json_doc, file)
                file.write(",")
                data = {"dos": doc.dos}  # add NON LSO of lobster
                monty_encoded_json_doc = jsanitize(
                    data, allow_bson=True, strict=True, enum_values=True
                )
                json.dump(monty_encoded_json_doc, file)
                file.write(",")
                data = {"lso_dos": doc.lso_dos}  # add LSO DOS of lobster
                monty_encoded_json_doc = jsanitize(
                    data, allow_bson=True, strict=True, enum_values=True
                )
                json.dump(monty_encoded_json_doc, file)
                file.write(",")
                data = {"builder_meta": doc.builder_meta}  # add builder metadata
                monty_encoded_json_doc = jsanitize(
                    data, allow_bson=False, strict=True, enum_values=True
                )
                json.dump(monty_encoded_json_doc, file)
                del data, monty_encoded_json_doc
                file.write("]")

        if save_computational_data_jsons:
            computational_data_json_save_dir = dir_name / "computational_data.json.gz"
            fields_to_exclude = [
                "nsites",
                "elements",
                "nelements",
                "formula_anonymous",
                "chemsys",
                "volume",
                "density",
                "density_atomic",
                "symmetry",
            ]
            # Always add cohp, cobi and coop data to the jsons if files exists
            if cohpcar_path.exists() and doc.cohp_data is None:
                cohp_obj = CompleteCohp.from_file(
                    fmt="LOBSTER",
                    structure_file=structure_path,
                    filename=cohpcar_path,
                    are_coops=False,
                    are_cobis=False,
                )
                doc.cohp_data = cohp_obj

            if coopcar_path.exists() and doc.coop_data is None:
                coop_obj = CompleteCohp.from_file(
                    fmt="LOBSTER",
                    structure_file=structure_path,
                    filename=coopcar_path,
                    are_coops=True,
                    are_cobis=False,
                )
                doc.coop_data = coop_obj

            if cobicar_path.exists() and doc.cobi_data is None:
                cobi_obj = CompleteCohp.from_file(
                    fmt="LOBSTER",
                    structure_file=structure_path,
                    filename=cobicar_path,
                    are_coops=False,
                    are_cobis=True,
                )
                doc.cobi_data = cobi_obj
            with gzip.open(
                computational_data_json_save_dir, "wt", encoding="UTF-8"
            ) as file:
                # Write the json in iterable format
                # (Necessary to load large JSON files via ijson)
                file.write("[")
                for attribute in doc.model_fields:
                    if attribute not in fields_to_exclude:
                        # Use monty encoder to automatically convert pymatgen
                        # objects and other data json compatible dict format
                        data = {
                            attribute: jsanitize(
                                getattr(doc, attribute),
                                allow_bson=False,
                                strict=True,
                                enum_values=True,
                            )
                        }
                        json.dump(data, file)
                        if attribute != list(doc.model_fields.keys())[-1]:
                            file.write(",")  # add comma separator between two dicts
                        del data
                file.write("]")

            # Again unset the cohp, cobi and coop data fields if not desired in the DB
            if not add_coxxcar_to_task_document:
                doc.cohp_data = None
                doc.coop_data = None
                doc.cobi_data = None

        return doc.model_copy(update=additional_fields)




def _replace_inf_values(data: Union[dict[Any, Any], list[Any]]) -> None:
    """
    Replace the -inf value in dictionary and with the string representation '-Infinity'.

    Parameters
    ----------
    data : dict
        dictionary to recursively iterate over

    Returns
    -------
    data
        Dictionary with replaced -inf values.

    """
    if isinstance(data, dict):
        for key, value in data.items():
            if isinstance(value, dict | list):
                _replace_inf_values(
                    value
                )  # Recursively process nested dictionaries and lists
            elif value == float("-inf"):
                data[key] = "-Infinity"  # Replace -inf with a string representation
    elif isinstance(data, list):
        for index, item in enumerate(data):
            if isinstance(item, dict | list):
                _replace_inf_values(
                    item
                )  # Recursively process nested dictionaries and lists
            elif item == float("-inf"):
                data[index] = "-Infinity"  # Replace -inf with a string representation


def _identify_strongest_bonds(
    analyse: Analysis,
    icobilist_path: Path,
    icohplist_path: Path,
    icooplist_path: Path,
) -> StrongestBonds:
    """
    Identify the strongest bonds and convert them into StrongestBonds objects.

    Parameters
    ----------
    analyse : .Analysis
        Analysis object from lobsterpy automatic analysis
    icobilist_path : Path or str
        Path to ICOBILIST.lobster
    icohplist_path : Path or str
        Path to ICOHPLIST.lobster
    icooplist_path : Path or str
        Path to ICOOPLIST.lobster

    Returns
    -------
    StrongestBonds
    """
    data = [
        (icohplist_path, False, False, "icohp"),
        (icobilist_path, True, False, "icobi"),
        (icooplist_path, False, True, "icoop"),
    ]
    output = []
    model_data = {"which_bonds": analyse.which_bonds}
    for file, are_cobis, are_coops, prop in data:
        if file.exists():
            icohplist = Icohplist(
                filename=file,
                are_cobis=are_cobis,
                are_coops=are_coops,
            )
            bond_dict = _get_strong_bonds(
                icohplist.icohpcollection.as_dict(),
                relevant_bonds=analyse.final_dict_bonds,
                are_cobis=are_cobis,
                are_coops=are_coops,
            )
            model_data[f"strongest_bonds_{prop}"] = bond_dict
            output.append(
                StrongestBonds(
                    strongest_bonds=bond_dict,
                    which_bonds=analyse.which_bonds,
                )
            )
        else:
            model_data[f"strongest_bonds_{prop}"] = {}
            output.append(None)
    return StrongestBonds(**model_data)


# Don't we have this in pymatgen somewhere?
def _get_strong_bonds(
    bondlist: dict, are_cobis: bool, are_coops: bool, relevant_bonds: dict
) -> dict:
    """
    Identify the strongest bonds from a list of bonds.

    Parameters
    ----------
    bondlist : dict.
        dict including bonding information
    are_cobis : bool.
        True if these are cobis
    are_coops : bool.
        True if these are coops
    relevant_bonds : dict.
        Dict include all bonds that are considered.

    Returns
    -------
    dict
        Dictionary including the strongest bonds.
    """
    bonds = []
    icohp_all = []
    lengths = []
    for a, b, c, length in zip(
        bondlist["list_atom1"],
        bondlist["list_atom2"],
        bondlist["list_icohp"],
        bondlist["list_length"],
        strict=True,
    ):
        bonds.append(f"{a.rstrip('0123456789')}-{b.rstrip('0123456789')}")
        icohp_all.append(sum(c.values()))
        lengths.append(length)

    bond_labels_unique = list(set(bonds))
    sep_icohp: list[list[float]] = [[] for _ in range(len(bond_labels_unique))]
    sep_lengths: list[list[float]] = [[] for _ in range(len(bond_labels_unique))]

    for idx, val in enumerate(bond_labels_unique):
        for j, val2 in enumerate(bonds):
            if val == val2:
                sep_icohp[idx].append(icohp_all[j])
                sep_lengths[idx].append(lengths[j])

    if are_cobis and not are_coops:
        prop = "icobi"
    elif not are_cobis and are_coops:
        prop = "icoop"
    else:
        prop = "icohp"

    bond_dict: dict[str, dict[str, Union[float, str]]] = {}
    for idx, lab in enumerate(bond_labels_unique):
        label = lab.split("-")
        label.sort()
        for rel_bnd in relevant_bonds:
            rel_bnd_list = rel_bnd.split("-")
            rel_bnd_list.sort()
            if label == rel_bnd_list:
                if prop == "icohp":
                    index = np.argmin(sep_icohp[idx])
                    bond_dict |= {
                        rel_bnd: {
                            "bond_strength": min(sep_icohp[idx]),
                            "length": sep_lengths[idx][index],
                        }
                    }
                else:
                    index = np.argmax(sep_icohp[idx])
                    bond_dict |= {
                        rel_bnd: {
                            "bond_strength": max(sep_icohp[idx]),
                            "length": sep_lengths[idx][index],
                        }
                    }
    return bond_dict



[docs]
def read_saved_json(
    filename: str, pymatgen_objs: bool = True, query: str = "structure"
) -> dict[str, Any]:
    r"""
    Read the data from  \*.json.gz files corresponding to query.

    Uses ijson to parse specific keys(memory efficient)

    Parameters
    ----------
    filename: str.
        name of the json file to read
    pymatgen_objs: bool.
        if True will convert structure,coop, cobi, cohp and dos to pymatgen objects
    query: str or None.
        field name to query from the json file. If None, all data will be returned.

    Returns
    -------
    dict
        Returns a dictionary with lobster task json data corresponding to query.
    """
    with gzip.open(filename, "rb") as file:
        lobster_data = {
            field: data
            for obj in ijson.items(file, "item", use_float=True)
            for field, data in obj.items()
            if query is None or query in obj
        }
        if not lobster_data:
            raise ValueError(
                "Please recheck the query argument. "
                f"No data associated to the requested 'query={query}' "
                f"found in the JSON file"
            )
    if pymatgen_objs:
        for query_key, value in lobster_data.items():
            if isinstance(value, dict):
                lobster_data[query_key] = MontyDecoder().process_decoded(value)
            elif "lobsterpy_data" in query_key:
                for field in lobster_data[query_key].__fields__:
                    val = MontyDecoder().process_decoded(
                        getattr(lobster_data[query_key], field)
                    )
                    setattr(lobster_data[query_key], field, val)

    return lobster_data