"""An entry set class for automatically building GibbsComputedEntry objects. Some of this
code has been adapted from the EntrySet class in pymatgen.
"""
from __future__ import annotations
import collections
import inspect
from copy import deepcopy
from functools import cached_property
from typing import TYPE_CHECKING
import numpy as np
from monty.json import MontyDecoder, MSONable
from monty.serialization import loadfn
from numpy.random import normal
from pymatgen.analysis.phase_diagram import PhaseDiagram
from pymatgen.core.composition import Element
from pymatgen.entries.computed_entries import ConstantEnergyAdjustment
from pymatgen.entries.entry_tools import EntrySet
from tqdm import tqdm
from rxn_network.core import Composition
from rxn_network.data import PATH_TO_NIST
from rxn_network.entries.corrections import CarbonateCorrection, CarbonDioxideAtmosphericCorrection
from rxn_network.entries.experimental import ExperimentalReferenceEntry
from rxn_network.entries.freed import FREEDReferenceEntry
from rxn_network.entries.gibbs import GibbsComputedEntry
from rxn_network.entries.interpolated import InterpolatedEntry
from rxn_network.entries.nist import NISTReferenceEntry
from rxn_network.thermo.utils import expand_pd
from rxn_network.utils.funcs import get_logger
logger = get_logger(__name__)
# prefer computed data for solids with high melting points (T >= 1500 ºC)
IGNORE_NIST_SOLIDS = loadfn(PATH_TO_NIST / "ignore_solids.json")
if TYPE_CHECKING:
from collections.abc import Iterable
from pymatgen.entries.computed_entries import ComputedEntry, ComputedStructureEntry, EnergyAdjustment
[docs]
class GibbsEntrySet(collections.abc.MutableSet, MSONable):
"""This object is based on pymatgen's EntrySet class and includes factory methods for
constructing GibbsComputedEntry objects from "zero-temperature"
ComputedStructureEntry objects. It also offers convenient methods for acquiring
entries from the entry set, whether that be using composition, stability, chemical
system, etc.
"""
def __init__(
self,
entries: Iterable[GibbsComputedEntry | ExperimentalReferenceEntry | InterpolatedEntry],
calculate_e_above_hulls: bool = False,
minimize_obj_size: bool = False,
):
"""The supplied collection of entries will automatically be converted to a set of
unique entries.
Args:
entries: A collection of entry objects that will make up the entry set.
calculate_e_above_hulls: Whether to pre-calculate the energy above hull for
each entry and store that in that entry's data. Defaults to False.
minimize_obj_size: Whether to reduce the size of the entry set by
removing metadata from each entry. This may be useful when working with
extremely large entry sets (or ComputedReaction sets). Defaults to
False.
"""
self.entries = set(entries)
self.calculate_e_above_hulls = calculate_e_above_hulls
self.minimize_obj_size = minimize_obj_size
if minimize_obj_size:
for e in self.entries:
e.parameters = {}
e.data = {}
if calculate_e_above_hulls:
for e in self.entries:
e.data["e_above_hull"] = self.get_e_above_hull(e)
def __contains__(self, item) -> bool:
return item in self.entries
def __iter__(self):
return self.entries.__iter__()
def __len__(self) -> int:
return len(self.entries)
[docs]
def add(self, entry: GibbsComputedEntry | ExperimentalReferenceEntry | InterpolatedEntry) -> None:
"""Add an entry to the set. This is an IN-PLACE method.
Args:
entry: An entry object.
"""
self.entries.add(entry)
self._clear_cache()
[docs]
def update(
self,
entries: Iterable[GibbsComputedEntry | ExperimentalReferenceEntry | InterpolatedEntry],
) -> None:
"""Add an iterable of entries to the set. This is an IN-PLACE method.
Args:
entries: Iterable of entry objects to add to the set.
"""
self.entries.update(entries)
self._clear_cache()
[docs]
def discard(self, entry: GibbsComputedEntry | ExperimentalReferenceEntry) -> None:
"""Discard an entry. This is an IN-PLACE method.
Args:
entry: An entry object.
"""
self.entries.discard(entry)
self._clear_cache()
@cached_property
def pd_dict(self) -> dict:
"""Returns a dictionary of phase diagrams, keyed by the chemical system. This is
acquired using the helper method expand_pd() and represents one of the simplest
divisions of sub-PDs for large chemical systems. Cached for speed.
"""
return expand_pd(self.entries)
[docs]
def get_subset_in_chemsys(self, chemsys: list[str] | str) -> GibbsEntrySet:
"""Returns a GibbsEntrySet containing only the set of entries belonging to
a particular chemical system (including subsystems). For example, if the entries
are from the Li-Fe-P-O system, and chemsys=["Li", "O"], only the Li, O, and Li-O
entries are returned.
Args:
chemsys: Chemical system specified as list of elements. E.g., ["Li", "O"]
Returns: GibbsEntrySet
"""
if isinstance(chemsys, str):
chemsys = chemsys.split("-")
chem_sys = set(chemsys)
if not chem_sys.issubset(self.chemsys):
raise ValueError(f"{chem_sys} is not a subset of {self.chemsys}")
subset = set()
for e in self.entries:
elements = [sp.symbol for sp in e.composition]
if chem_sys.issuperset(elements):
subset.add(e)
return GibbsEntrySet(subset, calculate_e_above_hulls=False)
[docs]
def filter_by_stability(self, e_above_hull: float, include_polymorphs: bool | None = False) -> GibbsEntrySet:
"""Filter the entry set by a metastability (energy above hull) cutoff.
Args:
e_above_hull: Energy above hull, the cutoff describing the allowed
metastability of the entries as determined via phase diagram
construction.
include_polymorphs: optional specification of whether to include
metastable polymorphs. Defaults to False.
Returns:
A new GibbsEntrySet where the entries have been filtered by an energy
cutoff (e_above_hull) via phase diagram construction.
"""
pd_dict = self.pd_dict
filtered_entries: set[GibbsComputedEntry | NISTReferenceEntry] = set()
all_comps: dict[str, GibbsComputedEntry | NISTReferenceEntry] = {}
for pd in pd_dict.values():
for entry in pd.all_entries:
if entry in filtered_entries or pd.get_e_above_hull(entry) > e_above_hull:
continue
formula = entry.composition.reduced_formula
if not include_polymorphs and (formula in all_comps):
if all_comps[formula].energy_per_atom < entry.energy_per_atom:
continue
filtered_entries.remove(all_comps[formula])
all_comps[formula] = entry
filtered_entries.add(entry)
return self.__class__(list(filtered_entries))
[docs]
def build_indices(self) -> None:
"""Builds the indices for the entry set in place. This method is called whenever an
entry is added/removed the entry set. The entry indices are useful for querying
the entry set for specific entries.
Warning: this internally modifies the entries in the entry set by updating data
for each entry to include the index.
Returns:
None
"""
for idx, e in enumerate(self.entries_list):
e.data.update({"idx": idx})
[docs]
def get_min_entry_by_formula(self, formula: str) -> ComputedEntry:
"""Helper method for acquiring the ground state entry with the specified formula.
Args:
formula: The chemical formula of the desired entry.
Returns:
Ground state computed entry object.
"""
return self.min_entries_by_formula[Composition(formula).reduced_formula]
[docs]
def get_stabilized_entry(self, entry: ComputedEntry, tol: float = 1e-3, force=False) -> ComputedEntry:
"""Helper method for lowering the energy of a single entry such that it is just
stable on the phase diagram. If the entry is already stable, it will be
returned unchanged.
Note: if the entry includes the "e_above_hull" data, this value will be used to
stabilize the entry. Otherwise, the energy above hull will be calculated via
creation of a phase diagram; this can take a long time for repeated calls.
Args:
entry: A computed entry object.
tol: The numerical padding added to the energy correction to guarantee
that it is determined to be stable during phase diagram construction.
force: due to numerical stability issues, if the entry is very close to the
hull (i.e., e_above_hull > 0 but very small), it may not be stabilized.
This option forces stabilization even if e_above_hull evalutes to
"zero". This can be crucial for certain edge cases.
Returns:
A new ComputedEntry with energy adjustment making it appear to be stable
with the current entry data (i.e., compositional phase diagram).
"""
e_above_hull = None
if hasattr(entry, "data"):
e_above_hull = entry.data.get("e_above_hull")
if e_above_hull is None:
e_above_hull = self.get_e_above_hull(entry)
if e_above_hull == 0.0 and not force:
new_entry = entry
else:
e_adj = -1 * e_above_hull * entry.composition.num_atoms - tol
adjustment = ConstantEnergyAdjustment(
value=e_adj,
name="Stabilization Adjustment",
description="Shifts energy so that entry is on the convex hull",
)
new_entry = self.get_adjusted_entry(entry, adjustment)
return new_entry
[docs]
def get_entries_with_new_temperature(self, new_temperature: float) -> GibbsEntrySet:
"""Returns a new GibbsEntrySet with entries that have had their energies
modified by using a new temperature.
Note: this will clear the "e_above_hull" data for each entry and re-calculate
them only if the original entry set had them calculated.
"""
new_entries = []
for entry in self.entries_list:
try:
new_entry = entry.get_new_temperature(new_temperature)
except ValueError as e:
logger.warning(f"Could not get new temperature for entry: {entry}. {e}")
continue
new_entry.data["e_above_hull"] = None
new_entries.append(new_entry)
return self.__class__(new_entries, calculate_e_above_hulls=self.calculate_e_above_hulls)
[docs]
def get_entries_with_jitter(self) -> GibbsEntrySet:
"""Returns a new GibbsEntrySet with entries that have had their energies shifted by
randomly sampled noise to account for uncertainty in data. This is done by
sampling from a Gaussian distribution using the entry's "correction_uncertainty"
attribute as the scale.
Returns:
A new GibbsEntrySet with entries that have had their energies shifted by
random Gaussian noise based on their "correction_uncertainty" values.
"""
entries = deepcopy(self.entries_list)
new_entries = []
jitter = normal(size=len(entries))
for idx, entry in enumerate(entries):
if entry.is_element:
continue
adj = ConstantEnergyAdjustment(
value=jitter[idx] * entry.correction_uncertainty,
name="Random jitter",
description=("Randomly sampled (Gaussian) noise to account for uncertainty in data"),
)
new_entries.append(self.get_adjusted_entry(entry, adj))
return GibbsEntrySet(new_entries)
[docs]
def get_interpolated_entry(self, formula: str, tol_per_atom: float = 1e-3) -> ComputedEntry:
"""Helper method for interpolating an entry from the entry set.
Args:
formula: The chemical formula of the desired entry.
tol_per_atom: the energy shift (eV/atom) below the hull energy so that the
interpolated entry is guaranteed to be stable. Defaults to 1 meV/atom.
Returns:
An interpolated GibbsComputedEntry object.
"""
comp = Composition(formula).reduced_composition
pd_entries = self.get_subset_in_chemsys([str(e) for e in comp.elements])
energy = PhaseDiagram(pd_entries).get_hull_energy(comp) - tol_per_atom * comp.num_atoms
adj = ConstantEnergyAdjustment( # for keeping track of uncertainty
value=0.0,
uncertainty=0.05 * comp.num_atoms, # conservative: 50 meV/atom uncertainty
name="Interpolation adjustment (for uncertainty)",
description="Maintains uncertainty due to interpolation",
)
return InterpolatedEntry(
comp,
energy,
energy_adjustments=[adj],
entry_id=f"InterpolatedEntry-{comp.formula}_{self.temperature}",
)
[docs]
def get_e_above_hull(self, entry: ComputedEntry) -> float:
"""Helper method for calculating the energy above hull for a single entry.
Args:
entry: A ComputedEntry object.
Returns:
The energy above hull for the entry.
"""
for chemsys, pd in self.pd_dict.items():
elems_pd = set(chemsys.split("-"))
elems_entry = set(entry.composition.chemical_system.split("-"))
if elems_entry.issubset(elems_pd):
return pd.get_e_above_hull(entry)
raise ValueError("Entry not in any of the phase diagrams in pd_dict!")
[docs]
@classmethod
def from_pd(
cls,
pd: PhaseDiagram,
temperature: float,
include_nist_data: bool = True,
include_freed_data: bool = False,
apply_carbonate_correction: bool = True,
apply_atmospheric_co2_correction: bool = True,
ignore_nist_solids: bool = True,
calculate_e_above_hulls: bool = False,
minimize_obj_size: bool = False,
) -> GibbsEntrySet:
"""Constructor method for building a GibbsEntrySet from an existing phase diagram.
Args:
pd: Phase Diagram object (pymatgen)
temperature: Temperature [K] for determining Gibbs Free Energy of
formation, dGf(T)
include_nist_data: Whether to include NIST data in the entry set. Defaults
to True.
include_freed_data: Whether to include Freed data in the entry set. Defaults
to False. Use at your own risk!
apply_carbonate_correction: Whether to apply the fit energy
correction for carbonates. Defaults to True.
apply_atmospheric_co2_correction: Whether to modify the chemical potential
of CO2 by its partial pressure in the atmosphere (0.04%). Defaults to True.
ignore_nist_solids: Whether to ignore NIST data for the solids specified in
the "data/nist/ignore_solids.json" file; these all have melting points
Tm >= 1500 ºC. Defaults to Ture.
calculate_e_above_hulls: Whether to calculate energy above hull for each
entry and store in the entry's data. Defaults to False.
minimize_obj_size: Whether to minimize the size of the object by removing
unnecessary attributes from the entries. Defaults to False.
Returns:
A GibbsEntrySet containing a collection of GibbsComputedEntry and
experimental reference entry objects at the specified temperature.
"""
gibbs_entries = []
experimental_formulas = []
for entry in pd.all_entries:
composition = entry.composition
formula = composition.reduced_formula
if composition.is_element and entry not in pd.el_refs.values() or formula in experimental_formulas:
continue
new_entries = []
new_entry = None
if include_nist_data:
new_entry = cls._check_for_experimental(
formula, "nist", temperature, ignore_nist_solids, apply_atmospheric_co2_correction
)
if new_entry:
new_entries.append(new_entry)
if include_freed_data:
new_entry = cls._check_for_experimental(
formula, "freed", temperature, ignore_nist_solids, apply_atmospheric_co2_correction
)
if new_entry:
new_entries.append(new_entry)
if new_entry:
experimental_formulas.append(formula)
else:
energy_adjustments = []
if apply_carbonate_correction:
corr = cls._get_carbonate_correction(entry)
if corr is not None:
energy_adjustments.append(corr)
if apply_atmospheric_co2_correction and formula == "CO2":
energy_adjustments.append(
CarbonDioxideAtmosphericCorrection(entry.composition.num_atoms, temperature)
)
structure = entry.structure
formation_energy_per_atom = pd.get_form_energy_per_atom(entry)
gibbs_entry = GibbsComputedEntry.from_structure(
structure=structure,
formation_energy_per_atom=formation_energy_per_atom,
temperature=temperature,
energy_adjustments=energy_adjustments,
parameters=entry.parameters,
data=entry.data,
entry_id=entry.entry_id,
)
new_entries.append(gibbs_entry)
gibbs_entries.extend(new_entries)
return cls(
gibbs_entries,
calculate_e_above_hulls=calculate_e_above_hulls,
minimize_obj_size=minimize_obj_size,
)
[docs]
def copy(self) -> GibbsEntrySet:
"""Returns a copy of the entry set."""
return GibbsEntrySet(entries=self.entries)
[docs]
def as_dict(self) -> dict:
"""Returns a JSON serializable dict representation of the entry set."""
d = super().as_dict()
d["entries"] = [e.as_dict() for e in self.entries]
d["calculate_e_above_hulls"] = self.calculate_e_above_hulls
return d
[docs]
@classmethod
def from_computed_entries(
cls,
entries: Iterable[ComputedStructureEntry],
temperature: float,
include_nist_data: bool = True,
include_freed_data: bool = False,
apply_carbonate_correction: bool = True,
apply_atmospheric_co2_correction: bool = True,
ignore_nist_solids: bool = True,
calculate_e_above_hulls: bool = False,
minimize_obj_size: bool = False,
) -> GibbsEntrySet:
"""Constructor method for initializing GibbsEntrySet from T = 0 K
ComputedStructureEntry objects, as acquired from a thermochemical database
(e.g., The Materials Project).
Automatically expands the phase diagram for large chemical systems (10 or more
elements) to avoid limitations of Qhull.
Args:
entries: Iterable of ComputedStructureEntry objects. These can be downloaded
from The Materials Project API or created manually with pymatgen.
temperature: Temperature [K] for determining Gibbs Free Energy of
formation, dGf(T)
include_nist_data: Whether to include NIST-JANAF data in the entry set.
Defaults to True.
include_freed_data: Whether to include FREED data in the entry set. Defaults
to False. WARNING: This dataset has not been thoroughly tested. Use at
your own risk!
apply_carbonate_correction: Whether to apply the fit GGA energy correction
for carbonates. Defaults to True.
apply_atmospheric_co2_correction: Whether to modify the chemical potential
of CO2 by its partial pressure in the atmosphere (). Defaults to True.
ignore_nist_solids: Whether to ignore NIST data for the solids specified in
the "data/nist/ignore_solids.json" file; these all have melting points
Tm >= 1500 ºC. Defaults to True.
calculate_e_above_hulls: Whether to calculate energy above hull for each
entry and store in the entry's data. Defaults to False.
minimize_obj_size: Whether to minimize the size of the object by removing
unrequired attributes from the entries. Defaults to False.
Returns:
A GibbsEntrySet containing a collection of GibbsComputedEntry and
experimental reference entry objects at the specified temperature.
"""
e_set = EntrySet(entries)
new_entries: set[GibbsComputedEntry] = set()
if len(e_set.chemsys) <= 9: # Qhull algorithm struggles beyond 9 dimensions
pd = PhaseDiagram(e_set)
return cls.from_pd(
pd,
temperature,
include_nist_data=include_nist_data,
include_freed_data=include_freed_data,
apply_carbonate_correction=apply_carbonate_correction,
apply_atmospheric_co2_correction=apply_atmospheric_co2_correction,
ignore_nist_solids=ignore_nist_solids,
calculate_e_above_hulls=calculate_e_above_hulls,
minimize_obj_size=minimize_obj_size,
)
pd_dict = expand_pd(list(e_set))
logger.info("Building entries from expanded phase diagrams...")
for _, pd in tqdm(pd_dict.items(), desc="GibbsComputedEntry"):
gibbs_set = cls.from_pd(
pd,
temperature,
include_nist_data=include_nist_data,
include_freed_data=include_freed_data,
apply_carbonate_correction=apply_carbonate_correction,
apply_atmospheric_co2_correction=apply_atmospheric_co2_correction,
ignore_nist_solids=ignore_nist_solids,
calculate_e_above_hulls=calculate_e_above_hulls,
minimize_obj_size=minimize_obj_size,
)
new_entries.update(gibbs_set)
return cls(list(new_entries))
@cached_property
def entries_list(self) -> list[ComputedEntry]:
"""Returns a list of all entries in the entry set."""
return sorted(self.entries, key=lambda e: e.composition)
@cached_property
def min_entries_by_formula(self) -> dict[str, ComputedEntry]:
"""Returns a dict of minimum energy entries in the entry set, indexed by
formula.
"""
min_entries = {}
for e in self.entries:
formula = e.composition.reduced_formula
if formula not in min_entries:
entries = filter(lambda x: x.composition.reduced_formula == formula, self.entries)
min_entries[formula] = sorted(entries, key=lambda x: x.energy_per_atom)[0]
return min_entries
@cached_property
def temperature(self) -> float:
"""Returns the temperature of entries in the dataset. More precisely, this is the
temperature encountered when iterating over the dataset.
Use at your own risk if mixing GibbsComputedEntry objects calculated at
different temperatures -- this poses even more theoretical problems!
"""
temp = 0.0
for e in self.entries:
if isinstance(e, (ExperimentalReferenceEntry, GibbsComputedEntry)):
temp = e.temperature # get temperature from any entry
break
return temp
@cached_property
def chemsys(self) -> set[str]:
"""Returns:
Set of symbols representing the chemical system, e.g., {"Li", "Fe", "P",
"O"}.
"""
chemsys = set()
for e in self.entries:
chemsys.update([el.symbol for el in e.composition])
return chemsys
[docs]
@staticmethod
def get_adjusted_entry(entry: GibbsComputedEntry, adjustment: EnergyAdjustment) -> GibbsComputedEntry:
"""Gets an entry with an energy adjustment applied.
Args:
entry: A GibbsComputedEntry object.
adjustment: An EnergyAdjustment object to apply to the entry.
Returns:
A new GibbsComputedEntry object with the energy adjustment applied.
"""
entry_dict = entry.as_dict()
original_entry = entry_dict.get("entry", None)
if original_entry:
energy_adjustments = original_entry["energy_adjustments"]
else:
energy_adjustments = entry_dict["energy_adjustments"]
energy_adjustments.append(adjustment.as_dict())
return MontyDecoder().process_decoded(entry_dict)
@staticmethod
def _check_for_experimental(
formula: str,
cls_name: str,
temperature: float,
ignore_nist_solids: bool,
apply_atmospheric_co2_correction: bool,
):
cls_name = cls_name.lower()
if cls_name in ("nist", "nistreferenceentry"):
cl = NISTReferenceEntry
elif cls_name in ("freed", "freedreferenceentry"):
cl = FREEDReferenceEntry
else:
raise ValueError("Invalid class name for experimental reference entry.")
entry = None
if formula in cl.REFERENCES:
if cl == NISTReferenceEntry and ignore_nist_solids and formula in IGNORE_NIST_SOLIDS:
return None
energy_adjustments = None
if apply_atmospheric_co2_correction and formula == "CO2":
energy_adjustments = [CarbonDioxideAtmosphericCorrection(3, temperature)]
try:
entry = cl(
composition=Composition(formula), temperature=temperature, energy_adjustments=energy_adjustments
)
except ValueError as error:
logger.debug(f"Compound {formula} is in {cl} tables but at different temperatures!: {error}")
return entry
@staticmethod
def _get_carbonate_correction(entry):
"""Helper method for determining the carbonate correction for an entry.
WARNING: The standard correction value provided in this module has been fit only
to MP-derived entries (i.e., entries calculated with MPRelaxSet and MPStaticSet
in VASP). Please check that the correction is valid for your entry.
"""
comp = entry.composition
if not {Element("C"), Element("O")}.issubset(comp.elements):
return None
if entry.parameters.get("run_type", None) not in ["GGA", "GGA+U"]:
return None
el_amts = comp.get_el_amt_dict() # now get number of carbonate ions
num_c = el_amts.get("C", 0)
num_o = el_amts.get("O", 0)
if num_c == 0 or num_o == 0:
return None
if not np.isclose(num_o / num_c, 3):
return None
return CarbonateCorrection(num_c)
def _clear_cache(self) -> None:
"""Clears all cached properties. This method is called whenever the entry set is
modified in place (as is done with the add method, etc.).
"""
for name, value in inspect.getmembers(GibbsEntrySet):
if isinstance(value, cached_property):
try:
delattr(self, name)
except AttributeError:
continue