"""An entry set class for automatically building GibbsComputedEntry objects. Some of thiscode has been adapted from the EntrySet class in pymatgen."""from__future__importannotationsimportcollectionsimportinspectfromcopyimportdeepcopyfromfunctoolsimportcached_propertyfromtypingimportTYPE_CHECKINGimportnumpyasnpfrommonty.jsonimportMontyDecoder,MSONablefrommonty.serializationimportloadfnfromnumpy.randomimportnormalfrompymatgen.analysis.phase_diagramimportPhaseDiagramfrompymatgen.core.compositionimportElementfrompymatgen.entries.computed_entriesimportConstantEnergyAdjustmentfrompymatgen.entries.entry_toolsimportEntrySetfromtqdmimporttqdmfromrxn_network.coreimportCompositionfromrxn_network.dataimportPATH_TO_NISTfromrxn_network.entries.correctionsimportCarbonateCorrection,CarbonDioxideAtmosphericCorrectionfromrxn_network.entries.experimentalimportExperimentalReferenceEntryfromrxn_network.entries.freedimportFREEDReferenceEntryfromrxn_network.entries.gibbsimportGibbsComputedEntryfromrxn_network.entries.interpolatedimportInterpolatedEntryfromrxn_network.entries.nistimportNISTReferenceEntryfromrxn_network.thermo.utilsimportexpand_pdfromrxn_network.utils.funcsimportget_loggerlogger=get_logger(__name__)# prefer computed data for solids with high melting points (T >= 1500 ºC)IGNORE_NIST_SOLIDS=loadfn(PATH_TO_NIST/"ignore_solids.json")ifTYPE_CHECKING:fromcollections.abcimportIterablefrompymatgen.entries.computed_entriesimportComputedEntry,ComputedStructureEntry,EnergyAdjustment
[docs]classGibbsEntrySet(collections.abc.MutableSet,MSONable):"""This object is based on pymatgen's EntrySet class and includes factory methods for constructing GibbsComputedEntry objects from "zero-temperature" ComputedStructureEntry objects. It also offers convenient methods for acquiring entries from the entry set, whether that be using composition, stability, chemical system, etc. """def__init__(self,entries:Iterable[GibbsComputedEntry|ExperimentalReferenceEntry|InterpolatedEntry],calculate_e_above_hulls:bool=False,minimize_obj_size:bool=False,):"""The supplied collection of entries will automatically be converted to a set of unique entries. Args: entries: A collection of entry objects that will make up the entry set. calculate_e_above_hulls: Whether to pre-calculate the energy above hull for each entry and store that in that entry's data. Defaults to False. minimize_obj_size: Whether to reduce the size of the entry set by removing metadata from each entry. This may be useful when working with extremely large entry sets (or ComputedReaction sets). Defaults to False. """self.entries=set(entries)self.calculate_e_above_hulls=calculate_e_above_hullsself.minimize_obj_size=minimize_obj_sizeifminimize_obj_size:foreinself.entries:e.parameters={}e.data={}ifcalculate_e_above_hulls:foreinself.entries:e.data["e_above_hull"]=self.get_e_above_hull(e)def__contains__(self,item)->bool:returniteminself.entriesdef__iter__(self):returnself.entries.__iter__()def__len__(self)->int:returnlen(self.entries)
[docs]defadd(self,entry:GibbsComputedEntry|ExperimentalReferenceEntry|InterpolatedEntry)->None:"""Add an entry to the set. This is an IN-PLACE method. Args: entry: An entry object. """self.entries.add(entry)self._clear_cache()
[docs]defupdate(self,entries:Iterable[GibbsComputedEntry|ExperimentalReferenceEntry|InterpolatedEntry],)->None:"""Add an iterable of entries to the set. This is an IN-PLACE method. Args: entries: Iterable of entry objects to add to the set. """self.entries.update(entries)self._clear_cache()
[docs]defdiscard(self,entry:GibbsComputedEntry|ExperimentalReferenceEntry)->None:"""Discard an entry. This is an IN-PLACE method. Args: entry: An entry object. """self.entries.discard(entry)self._clear_cache()
@cached_propertydefpd_dict(self)->dict:"""Returns a dictionary of phase diagrams, keyed by the chemical system. This is acquired using the helper method expand_pd() and represents one of the simplest divisions of sub-PDs for large chemical systems. Cached for speed. """returnexpand_pd(self.entries)
[docs]defget_subset_in_chemsys(self,chemsys:list[str]|str)->GibbsEntrySet:"""Returns a GibbsEntrySet containing only the set of entries belonging to a particular chemical system (including subsystems). For example, if the entries are from the Li-Fe-P-O system, and chemsys=["Li", "O"], only the Li, O, and Li-O entries are returned. Args: chemsys: Chemical system specified as list of elements. E.g., ["Li", "O"] Returns: GibbsEntrySet """ifisinstance(chemsys,str):chemsys=chemsys.split("-")chem_sys=set(chemsys)ifnotchem_sys.issubset(self.chemsys):raiseValueError(f"{chem_sys} is not a subset of {self.chemsys}")subset=set()foreinself.entries:elements=[sp.symbolforspine.composition]ifchem_sys.issuperset(elements):subset.add(e)returnGibbsEntrySet(subset,calculate_e_above_hulls=False)
[docs]deffilter_by_stability(self,e_above_hull:float,include_polymorphs:bool|None=False)->GibbsEntrySet:"""Filter the entry set by a metastability (energy above hull) cutoff. Args: e_above_hull: Energy above hull, the cutoff describing the allowed metastability of the entries as determined via phase diagram construction. include_polymorphs: optional specification of whether to include metastable polymorphs. Defaults to False. Returns: A new GibbsEntrySet where the entries have been filtered by an energy cutoff (e_above_hull) via phase diagram construction. """pd_dict=self.pd_dictfiltered_entries:set[GibbsComputedEntry|NISTReferenceEntry]=set()all_comps:dict[str,GibbsComputedEntry|NISTReferenceEntry]={}forpdinpd_dict.values():forentryinpd.all_entries:ifentryinfiltered_entriesorpd.get_e_above_hull(entry)>e_above_hull:continueformula=entry.composition.reduced_formulaifnotinclude_polymorphsand(formulainall_comps):ifall_comps[formula].energy_per_atom<entry.energy_per_atom:continuefiltered_entries.remove(all_comps[formula])all_comps[formula]=entryfiltered_entries.add(entry)returnself.__class__(list(filtered_entries))
[docs]defbuild_indices(self)->None:"""Builds the indices for the entry set in place. This method is called whenever an entry is added/removed the entry set. The entry indices are useful for querying the entry set for specific entries. Warning: this internally modifies the entries in the entry set by updating data for each entry to include the index. Returns: None """foridx,einenumerate(self.entries_list):e.data.update({"idx":idx})
[docs]defget_min_entry_by_formula(self,formula:str)->ComputedEntry:"""Helper method for acquiring the ground state entry with the specified formula. Args: formula: The chemical formula of the desired entry. Returns: Ground state computed entry object. """returnself.min_entries_by_formula[Composition(formula).reduced_formula]
[docs]defget_stabilized_entry(self,entry:ComputedEntry,tol:float=1e-3,force=False)->ComputedEntry:"""Helper method for lowering the energy of a single entry such that it is just stable on the phase diagram. If the entry is already stable, it will be returned unchanged. Note: if the entry includes the "e_above_hull" data, this value will be used to stabilize the entry. Otherwise, the energy above hull will be calculated via creation of a phase diagram; this can take a long time for repeated calls. Args: entry: A computed entry object. tol: The numerical padding added to the energy correction to guarantee that it is determined to be stable during phase diagram construction. force: due to numerical stability issues, if the entry is very close to the hull (i.e., e_above_hull > 0 but very small), it may not be stabilized. This option forces stabilization even if e_above_hull evalutes to "zero". This can be crucial for certain edge cases. Returns: A new ComputedEntry with energy adjustment making it appear to be stable with the current entry data (i.e., compositional phase diagram). """e_above_hull=Noneifhasattr(entry,"data"):e_above_hull=entry.data.get("e_above_hull")ife_above_hullisNone:e_above_hull=self.get_e_above_hull(entry)ife_above_hull==0.0andnotforce:new_entry=entryelse:e_adj=-1*e_above_hull*entry.composition.num_atoms-toladjustment=ConstantEnergyAdjustment(value=e_adj,name="Stabilization Adjustment",description="Shifts energy so that entry is on the convex hull",)new_entry=self.get_adjusted_entry(entry,adjustment)returnnew_entry
[docs]defget_entries_with_new_temperature(self,new_temperature:float)->GibbsEntrySet:"""Returns a new GibbsEntrySet with entries that have had their energies modified by using a new temperature. Note: this will clear the "e_above_hull" data for each entry and re-calculate them only if the original entry set had them calculated. """new_entries=[]forentryinself.entries_list:try:new_entry=entry.get_new_temperature(new_temperature)exceptValueErrorase:logger.warning(f"Could not get new temperature for entry: {entry}. {e}")continuenew_entry.data["e_above_hull"]=Nonenew_entries.append(new_entry)returnself.__class__(new_entries,calculate_e_above_hulls=self.calculate_e_above_hulls)
[docs]defget_entries_with_jitter(self)->GibbsEntrySet:"""Returns a new GibbsEntrySet with entries that have had their energies shifted by randomly sampled noise to account for uncertainty in data. This is done by sampling from a Gaussian distribution using the entry's "correction_uncertainty" attribute as the scale. Returns: A new GibbsEntrySet with entries that have had their energies shifted by random Gaussian noise based on their "correction_uncertainty" values. """entries=deepcopy(self.entries_list)new_entries=[]jitter=normal(size=len(entries))foridx,entryinenumerate(entries):ifentry.is_element:continueadj=ConstantEnergyAdjustment(value=jitter[idx]*entry.correction_uncertainty,name="Random jitter",description=("Randomly sampled (Gaussian) noise to account for uncertainty in data"),)new_entries.append(self.get_adjusted_entry(entry,adj))returnGibbsEntrySet(new_entries)
[docs]defget_interpolated_entry(self,formula:str,tol_per_atom:float=1e-3)->ComputedEntry:"""Helper method for interpolating an entry from the entry set. Args: formula: The chemical formula of the desired entry. tol_per_atom: the energy shift (eV/atom) below the hull energy so that the interpolated entry is guaranteed to be stable. Defaults to 1 meV/atom. Returns: An interpolated GibbsComputedEntry object. """comp=Composition(formula).reduced_compositionpd_entries=self.get_subset_in_chemsys([str(e)foreincomp.elements])energy=PhaseDiagram(pd_entries).get_hull_energy(comp)-tol_per_atom*comp.num_atomsadj=ConstantEnergyAdjustment(# for keeping track of uncertaintyvalue=0.0,uncertainty=0.05*comp.num_atoms,# conservative: 50 meV/atom uncertaintyname="Interpolation adjustment (for uncertainty)",description="Maintains uncertainty due to interpolation",)returnInterpolatedEntry(comp,energy,energy_adjustments=[adj],entry_id=f"InterpolatedEntry-{comp.formula}_{self.temperature}",)
[docs]defget_e_above_hull(self,entry:ComputedEntry)->float:"""Helper method for calculating the energy above hull for a single entry. Args: entry: A ComputedEntry object. Returns: The energy above hull for the entry. """forchemsys,pdinself.pd_dict.items():elems_pd=set(chemsys.split("-"))elems_entry=set(entry.composition.chemical_system.split("-"))ifelems_entry.issubset(elems_pd):returnpd.get_e_above_hull(entry)raiseValueError("Entry not in any of the phase diagrams in pd_dict!")
[docs]@classmethoddeffrom_pd(cls,pd:PhaseDiagram,temperature:float,include_nist_data:bool=True,include_freed_data:bool=False,apply_carbonate_correction:bool=True,apply_atmospheric_co2_correction:bool=True,ignore_nist_solids:bool=True,calculate_e_above_hulls:bool=False,minimize_obj_size:bool=False,)->GibbsEntrySet:"""Constructor method for building a GibbsEntrySet from an existing phase diagram. Args: pd: Phase Diagram object (pymatgen) temperature: Temperature [K] for determining Gibbs Free Energy of formation, dGf(T) include_nist_data: Whether to include NIST data in the entry set. Defaults to True. include_freed_data: Whether to include Freed data in the entry set. Defaults to False. Use at your own risk! apply_carbonate_correction: Whether to apply the fit energy correction for carbonates. Defaults to True. apply_atmospheric_co2_correction: Whether to modify the chemical potential of CO2 by its partial pressure in the atmosphere (0.04%). Defaults to True. ignore_nist_solids: Whether to ignore NIST data for the solids specified in the "data/nist/ignore_solids.json" file; these all have melting points Tm >= 1500 ºC. Defaults to Ture. calculate_e_above_hulls: Whether to calculate energy above hull for each entry and store in the entry's data. Defaults to False. minimize_obj_size: Whether to minimize the size of the object by removing unnecessary attributes from the entries. Defaults to False. Returns: A GibbsEntrySet containing a collection of GibbsComputedEntry and experimental reference entry objects at the specified temperature. """gibbs_entries=[]experimental_formulas=[]forentryinpd.all_entries:composition=entry.compositionformula=composition.reduced_formulaifcomposition.is_elementandentrynotinpd.el_refs.values()orformulainexperimental_formulas:continuenew_entries=[]new_entry=Noneifinclude_nist_data:new_entry=cls._check_for_experimental(formula,"nist",temperature,ignore_nist_solids,apply_atmospheric_co2_correction)ifnew_entry:new_entries.append(new_entry)ifinclude_freed_data:new_entry=cls._check_for_experimental(formula,"freed",temperature,ignore_nist_solids,apply_atmospheric_co2_correction)ifnew_entry:new_entries.append(new_entry)ifnew_entry:experimental_formulas.append(formula)else:energy_adjustments=[]ifapply_carbonate_correction:corr=cls._get_carbonate_correction(entry)ifcorrisnotNone:energy_adjustments.append(corr)ifapply_atmospheric_co2_correctionandformula=="CO2":energy_adjustments.append(CarbonDioxideAtmosphericCorrection(entry.composition.num_atoms,temperature))structure=entry.structureformation_energy_per_atom=pd.get_form_energy_per_atom(entry)gibbs_entry=GibbsComputedEntry.from_structure(structure=structure,formation_energy_per_atom=formation_energy_per_atom,temperature=temperature,energy_adjustments=energy_adjustments,parameters=entry.parameters,data=entry.data,entry_id=entry.entry_id,)new_entries.append(gibbs_entry)gibbs_entries.extend(new_entries)returncls(gibbs_entries,calculate_e_above_hulls=calculate_e_above_hulls,minimize_obj_size=minimize_obj_size,)
[docs]defcopy(self)->GibbsEntrySet:"""Returns a copy of the entry set."""returnGibbsEntrySet(entries=self.entries)
[docs]defas_dict(self)->dict:"""Returns a JSON serializable dict representation of the entry set."""d=super().as_dict()d["entries"]=[e.as_dict()foreinself.entries]d["calculate_e_above_hulls"]=self.calculate_e_above_hullsreturnd
[docs]@classmethoddeffrom_computed_entries(cls,entries:Iterable[ComputedStructureEntry],temperature:float,include_nist_data:bool=True,include_freed_data:bool=False,apply_carbonate_correction:bool=True,apply_atmospheric_co2_correction:bool=True,ignore_nist_solids:bool=True,calculate_e_above_hulls:bool=False,minimize_obj_size:bool=False,)->GibbsEntrySet:"""Constructor method for initializing GibbsEntrySet from T = 0 K ComputedStructureEntry objects, as acquired from a thermochemical database (e.g., The Materials Project). Automatically expands the phase diagram for large chemical systems (10 or more elements) to avoid limitations of Qhull. Args: entries: Iterable of ComputedStructureEntry objects. These can be downloaded from The Materials Project API or created manually with pymatgen. temperature: Temperature [K] for determining Gibbs Free Energy of formation, dGf(T) include_nist_data: Whether to include NIST-JANAF data in the entry set. Defaults to True. include_freed_data: Whether to include FREED data in the entry set. Defaults to False. WARNING: This dataset has not been thoroughly tested. Use at your own risk! apply_carbonate_correction: Whether to apply the fit GGA energy correction for carbonates. Defaults to True. apply_atmospheric_co2_correction: Whether to modify the chemical potential of CO2 by its partial pressure in the atmosphere (). Defaults to True. ignore_nist_solids: Whether to ignore NIST data for the solids specified in the "data/nist/ignore_solids.json" file; these all have melting points Tm >= 1500 ºC. Defaults to True. calculate_e_above_hulls: Whether to calculate energy above hull for each entry and store in the entry's data. Defaults to False. minimize_obj_size: Whether to minimize the size of the object by removing unrequired attributes from the entries. Defaults to False. Returns: A GibbsEntrySet containing a collection of GibbsComputedEntry and experimental reference entry objects at the specified temperature. """e_set=EntrySet(entries)new_entries:set[GibbsComputedEntry]=set()iflen(e_set.chemsys)<=9:# Qhull algorithm struggles beyond 9 dimensionspd=PhaseDiagram(e_set)returncls.from_pd(pd,temperature,include_nist_data=include_nist_data,include_freed_data=include_freed_data,apply_carbonate_correction=apply_carbonate_correction,apply_atmospheric_co2_correction=apply_atmospheric_co2_correction,ignore_nist_solids=ignore_nist_solids,calculate_e_above_hulls=calculate_e_above_hulls,minimize_obj_size=minimize_obj_size,)pd_dict=expand_pd(list(e_set))logger.info("Building entries from expanded phase diagrams...")for_,pdintqdm(pd_dict.items(),desc="GibbsComputedEntry"):gibbs_set=cls.from_pd(pd,temperature,include_nist_data=include_nist_data,include_freed_data=include_freed_data,apply_carbonate_correction=apply_carbonate_correction,apply_atmospheric_co2_correction=apply_atmospheric_co2_correction,ignore_nist_solids=ignore_nist_solids,calculate_e_above_hulls=calculate_e_above_hulls,minimize_obj_size=minimize_obj_size,)new_entries.update(gibbs_set)returncls(list(new_entries))
@cached_propertydefentries_list(self)->list[ComputedEntry]:"""Returns a list of all entries in the entry set."""returnsorted(self.entries,key=lambdae:e.composition)@cached_propertydefmin_entries_by_formula(self)->dict[str,ComputedEntry]:"""Returns a dict of minimum energy entries in the entry set, indexed by formula. """min_entries={}foreinself.entries:formula=e.composition.reduced_formulaifformulanotinmin_entries:entries=filter(lambdax:x.composition.reduced_formula==formula,self.entries)min_entries[formula]=sorted(entries,key=lambdax:x.energy_per_atom)[0]returnmin_entries@cached_propertydeftemperature(self)->float:"""Returns the temperature of entries in the dataset. More precisely, this is the temperature encountered when iterating over the dataset. Use at your own risk if mixing GibbsComputedEntry objects calculated at different temperatures -- this poses even more theoretical problems! """temp=0.0foreinself.entries:ifisinstance(e,(ExperimentalReferenceEntry,GibbsComputedEntry)):temp=e.temperature# get temperature from any entrybreakreturntemp@cached_propertydefchemsys(self)->set[str]:"""Returns: Set of symbols representing the chemical system, e.g., {"Li", "Fe", "P", "O"}. """chemsys=set()foreinself.entries:chemsys.update([el.symbolforeline.composition])returnchemsys
[docs]@staticmethoddefget_adjusted_entry(entry:GibbsComputedEntry,adjustment:EnergyAdjustment)->GibbsComputedEntry:"""Gets an entry with an energy adjustment applied. Args: entry: A GibbsComputedEntry object. adjustment: An EnergyAdjustment object to apply to the entry. Returns: A new GibbsComputedEntry object with the energy adjustment applied. """entry_dict=entry.as_dict()original_entry=entry_dict.get("entry",None)iforiginal_entry:energy_adjustments=original_entry["energy_adjustments"]else:energy_adjustments=entry_dict["energy_adjustments"]energy_adjustments.append(adjustment.as_dict())returnMontyDecoder().process_decoded(entry_dict)
@staticmethoddef_check_for_experimental(formula:str,cls_name:str,temperature:float,ignore_nist_solids:bool,apply_atmospheric_co2_correction:bool,):cls_name=cls_name.lower()ifcls_namein("nist","nistreferenceentry"):cl=NISTReferenceEntryelifcls_namein("freed","freedreferenceentry"):cl=FREEDReferenceEntryelse:raiseValueError("Invalid class name for experimental reference entry.")entry=Noneifformulaincl.REFERENCES:ifcl==NISTReferenceEntryandignore_nist_solidsandformulainIGNORE_NIST_SOLIDS:returnNoneenergy_adjustments=Noneifapply_atmospheric_co2_correctionandformula=="CO2":energy_adjustments=[CarbonDioxideAtmosphericCorrection(3,temperature)]try:entry=cl(composition=Composition(formula),temperature=temperature,energy_adjustments=energy_adjustments)exceptValueErroraserror:logger.debug(f"Compound {formula} is in {cl} tables but at different temperatures!: {error}")returnentry@staticmethoddef_get_carbonate_correction(entry):"""Helper method for determining the carbonate correction for an entry. WARNING: The standard correction value provided in this module has been fit only to MP-derived entries (i.e., entries calculated with MPRelaxSet and MPStaticSet in VASP). Please check that the correction is valid for your entry. """comp=entry.compositionifnot{Element("C"),Element("O")}.issubset(comp.elements):returnNoneifentry.parameters.get("run_type",None)notin["GGA","GGA+U"]:returnNoneel_amts=comp.get_el_amt_dict()# now get number of carbonate ionsnum_c=el_amts.get("C",0)num_o=el_amts.get("O",0)ifnum_c==0ornum_o==0:returnNoneifnotnp.isclose(num_o/num_c,3):returnNonereturnCarbonateCorrection(num_c)def_clear_cache(self)->None:"""Clears all cached properties. This method is called whenever the entry set is modified in place (as is done with the add method, etc.). """forname,valueininspect.getmembers(GibbsEntrySet):ifisinstance(value,cached_property):try:delattr(self,name)exceptAttributeError:continue