Emmet-Core¶

The core module defines the data models for the Materials API (MAPI):

`CrystalSystem` ¶

Bases: ValueEnum

The crystal system of the lattice

Source code in emmet/core/symmetry.py

class CrystalSystem(ValueEnum):
    """
    The crystal system of the lattice
    """

    tri = "Triclinic"
    mono = "Monoclinic"
    ortho = "Orthorhombic"
    tet = "Tetragonal"
    trig = "Trigonal"
    hex_ = "Hexagonal"
    cubic = "Cubic"

`PointGroupData` ¶

Bases: BaseModel

Defines symmetry for a molecule document

Source code in emmet/core/symmetry.py

class PointGroupData(BaseModel):
    """
    Defines symmetry for a molecule document
    """

    point_group: Optional[str] = Field(
        None, title="Point Group Symbol", description="The point group for the lattice"
    )

    rotation_number: Optional[float] = Field(
        None,
        title="Rotational Symmetry Number",
        description="Rotational symmetry number for the molecule",
    )

    linear: Optional[bool] = Field(
        None, title="Molecule Linearity", description="Is the molecule linear?"
    )

    tolerance: Optional[float] = Field(
        None,
        title="Point Group Analyzer Tolerance",
        description="Distance tolerance to consider sites as symmetrically equivalent.",
    )

    eigen_tolerance: Optional[float] = Field(
        None,
        title="Interia Tensor Eigenvalue Tolerance",
        description="Tolerance to compare eigen values of the inertia tensor.",
    )

    matrix_tolerance: Optional[float] = Field(
        None,
        title="Symmetry Operation Matrix Element Tolerance",
        description="Tolerance used to generate the full set of symmetry operations of the point group.",
    )

    @classmethod
    def from_molecule(cls, molecule: Molecule) -> "PointGroupData":
        tol = SETTINGS.PGATOL
        eigentol = SETTINGS.PGAEIGENTOL
        matrixtol = SETTINGS.PGAMATRIXTOL
        pga = PointGroupAnalyzer(
            molecule,
            tolerance=tol,
            eigen_tolerance=eigentol,
            matrix_tolerance=matrixtol,
        )
        symmetry: Dict[str, Any] = {
            "tolerance": tol,
            "eigen_tolerance": eigentol,
            "matrix_tolerance": matrixtol,
            "point_group": pga.sch_symbol,
        }

        rotational_symmetry_numbers = {
            1.0: ["C1", "Cs", "Ci", "C*v", "S2"],
            2.0: ["C2", "C2h", "C2v", "S4", "D*h"],
            3.0: ["C3", "C3h", "C3v", "S6"],
            4.0: ["C4v", "D4h", "D4d", "D2", "D2h", "D2d"],
            5.0: ["C5v", "Ih"],
            6.0: ["D3", "D3h", "D3d"],
            10.0: ["D5h", "D5d"],
            12.0: ["T", "Td", "Th", "D6h"],
            14.0: ["D7h"],
            16.0: ["D8h"],
            24.0: ["Oh"],
            float("inf"): ["Kh"],
        }

        r = 1.0
        for rot_num, point_groups in rotational_symmetry_numbers.items():
            if symmetry["point_group"] in point_groups:
                r = rot_num
                break
        if symmetry["point_group"] in ["C*v", "D*h"]:
            linear = True
        else:
            linear = False

        symmetry["rotation_number"] = float(r)
        symmetry["linear"] = linear

        return PointGroupData(**symmetry)

`SymmetryData` ¶

Bases: BaseModel

Defines a symmetry data set for materials documents

Source code in emmet/core/symmetry.py

class SymmetryData(BaseModel):
    """
    Defines a symmetry data set for materials documents
    """

    crystal_system: Optional[CrystalSystem] = Field(
        None, title="Crystal System", description="The crystal system for this lattice."
    )

    symbol: Optional[str] = Field(
        None,
        title="Space Group Symbol",
        description="The spacegroup symbol for the lattice.",
    )

    number: Optional[int] = Field(
        None,
        title="Space Group Number",
        description="The spacegroup number for the lattice.",
    )

    point_group: Optional[str] = Field(
        None, title="Point Group Symbol", description="The point group for the lattice."
    )

    symprec: Optional[float] = Field(
        None,
        title="Symmetry Finding Precision",
        description="The precision given to spglib to determine the symmetry of this lattice.",
    )

    version: Optional[str] = Field(None, title="SPGLib version")

    @classmethod
    def from_structure(cls, structure: Structure) -> "SymmetryData":
        symprec = SETTINGS.SYMPREC
        sg = SpacegroupAnalyzer(structure, symprec=symprec)
        symmetry: Dict[str, Any] = {"symprec": symprec}
        if not sg.get_symmetry_dataset():
            sg = SpacegroupAnalyzer(structure, 1e-3, 1)
            symmetry["symprec"] = 1e-3

        symmetry.update(
            {
                "source": "spglib",
                "symbol": sg.get_space_group_symbol(),
                "number": sg.get_space_group_number(),
                "point_group": sg.get_point_group_symbol(),
                "crystal_system": CrystalSystem(sg.get_crystal_system().title()),
                "hall": sg.get_hall(),
                "version": spglib.__version__,
            }
        )

        return SymmetryData(**symmetry)

Core definition of Structure and Molecule metadata.

`MoleculeMetadata` ¶

Bases: EmmetBaseModel

Mix-in class for molecule metadata.

Source code in emmet/core/structure.py

class MoleculeMetadata(EmmetBaseModel):
    """Mix-in class for molecule metadata."""

    charge: Optional[int] = Field(None, description="Charge of the molecule")
    spin_multiplicity: Optional[int] = Field(
        None, description="Spin multiplicity of the molecule"
    )
    natoms: Optional[int] = Field(
        None, description="Total number of atoms in the molecule"
    )
    elements: Optional[List[Element]] = Field(
        None, description="List of elements in the molecule"
    )
    nelements: Optional[int] = Field(None, title="Number of Elements")
    nelectrons: Optional[int] = Field(
        None,
        title="Number of electrons",
        description="The total number of electrons for the molecule",
    )
    composition: Optional[Composition] = Field(
        None, description="Full composition for the molecule"
    )
    composition_reduced: Optional[Composition] = Field(
        None,
        title="Reduced Composition",
        description="Simplified representation of the composition",
    )
    formula_alphabetical: Optional[str] = Field(
        None,
        title="Alphabetical Formula",
        description="Alphabetical molecular formula",
    )
    formula_pretty: Optional[str] = Field(
        None,
        title="Pretty Formula",
        description="Cleaned representation of the formula.",
    )
    formula_anonymous: Optional[str] = Field(
        None,
        title="Anonymous Formula",
        description="Anonymized representation of the formula",
    )
    chemsys: Optional[str] = Field(
        None,
        title="Chemical System",
        description="dash-delimited string of elements in the molecule",
    )
    symmetry: Optional[PointGroupData] = Field(
        None, description="Symmetry data for this molecule"
    )

    @classmethod
    def from_composition(
        cls: Type[S],
        comp: Composition,
        fields: Optional[List[str]] = None,
        **kwargs,
    ) -> S:
        """
        Create a MoleculeMetadata model from a composition.

        Parameters
        ----------
        comp : .Composition
            A pymatgen composition.
        fields : list of str or None
            Composition fields to include.
        **kwargs
            Keyword arguments that are passed to the model constructor.

        Returns
        -------
        T
            A molecule metadata model.
        """
        fields = (
            [
                "elements",
                "nelements",
                "composition",
                "composition_reduced",
                "formula_alphabetical",
                "formula_pretty",
                "formula_anonymous",
                "chemsys",
            ]
            if fields is None
            else fields
        )
        elsyms = sorted({e.symbol for e in comp.elements})

        data = {
            "elements": elsyms,
            "nelements": len(elsyms),
            "composition": comp,
            "composition_reduced": comp.reduced_composition,
            "formula_alphabetical": comp.alphabetical_formula,
            "formula_pretty": comp.reduced_formula,
            "formula_anonymous": comp.anonymized_formula,
            "chemsys": "-".join(elsyms),
        }

        return cls(**{k: v for k, v in data.items() if k in fields}, **kwargs)

    @classmethod
    def from_molecule(
        cls: Type[S],
        meta_molecule: Molecule,
        fields: Optional[List[str]] = None,
        **kwargs,
    ) -> S:
        fields = (
            [
                "charge",
                "spin_multiplicity",
                "natoms",
                "elements",
                "nelements",
                "nelectrons",
                "composition",
                "composition_reduced",
                "formula_alphabetical",
                "formula_pretty",
                "formula_anonymous",
                "chemsys",
                "symmetry",
            ]
            if fields is None
            else fields
        )
        comp = meta_molecule.composition
        elsyms = sorted({e.symbol for e in comp.elements})
        symmetry = PointGroupData.from_molecule(meta_molecule)

        data = {
            "charge": int(meta_molecule.charge),
            "spin_multiplicity": meta_molecule.spin_multiplicity,
            "natoms": len(meta_molecule),
            "elements": elsyms,
            "nelements": len(elsyms),
            "nelectrons": int(meta_molecule.nelectrons),
            "composition": comp,
            "composition_reduced": comp.reduced_composition,
            "formula_alphabetical": comp.alphabetical_formula,
            "formula_pretty": comp.reduced_formula,
            "formula_anonymous": comp.anonymized_formula,
            "chemsys": "-".join(elsyms),
            "symmetry": symmetry,
        }

        return cls(**{k: v for k, v in data.items() if k in fields}, **kwargs)

`from_composition(comp, fields=None, **kwargs)` `classmethod` ¶

Create a MoleculeMetadata model from a composition.

Parameters¶

comp : .Composition A pymatgen composition. fields : list of str or None Composition fields to include. **kwargs Keyword arguments that are passed to the model constructor.

Returns¶

T A molecule metadata model.

Source code in emmet/core/structure.py

@classmethod
def from_composition(
    cls: Type[S],
    comp: Composition,
    fields: Optional[List[str]] = None,
    **kwargs,
) -> S:
    """
    Create a MoleculeMetadata model from a composition.

    Parameters
    ----------
    comp : .Composition
        A pymatgen composition.
    fields : list of str or None
        Composition fields to include.
    **kwargs
        Keyword arguments that are passed to the model constructor.

    Returns
    -------
    T
        A molecule metadata model.
    """
    fields = (
        [
            "elements",
            "nelements",
            "composition",
            "composition_reduced",
            "formula_alphabetical",
            "formula_pretty",
            "formula_anonymous",
            "chemsys",
        ]
        if fields is None
        else fields
    )
    elsyms = sorted({e.symbol for e in comp.elements})

    data = {
        "elements": elsyms,
        "nelements": len(elsyms),
        "composition": comp,
        "composition_reduced": comp.reduced_composition,
        "formula_alphabetical": comp.alphabetical_formula,
        "formula_pretty": comp.reduced_formula,
        "formula_anonymous": comp.anonymized_formula,
        "chemsys": "-".join(elsyms),
    }

    return cls(**{k: v for k, v in data.items() if k in fields}, **kwargs)

`StructureMetadata` ¶

Bases: EmmetBaseModel

Mix-in class for structure metadata.

Source code in emmet/core/structure.py

class StructureMetadata(EmmetBaseModel):
    """Mix-in class for structure metadata."""

    # Structure metadata
    nsites: Optional[int] = Field(
        None, description="Total number of sites in the structure."
    )
    elements: Optional[List[Element]] = Field(
        None, description="List of elements in the material."
    )
    nelements: Optional[int] = Field(None, description="Number of elements.")
    composition: Optional[Composition] = Field(
        None, description="Full composition for the material."
    )
    composition_reduced: Optional[Composition] = Field(
        None,
        title="Reduced Composition",
        description="Simplified representation of the composition.",
    )
    formula_pretty: Optional[str] = Field(
        None,
        title="Pretty Formula",
        description="Cleaned representation of the formula.",
    )
    formula_anonymous: Optional[str] = Field(
        None,
        title="Anonymous Formula",
        description="Anonymized representation of the formula.",
    )
    chemsys: Optional[str] = Field(
        None,
        title="Chemical System",
        description="dash-delimited string of elements in the material.",
    )
    volume: Optional[float] = Field(
        None,
        title="Volume",
        description="Total volume for this structure in Angstroms^3.",
    )

    density: Optional[float] = Field(
        None, title="Density", description="Density in grams per cm^3."
    )

    density_atomic: Optional[float] = Field(
        None,
        title="Packing Density",
        description="The atomic packing density in atoms per cm^3.",
    )

    symmetry: Optional[SymmetryData] = Field(
        None, description="Symmetry data for this material."
    )

    @classmethod
    def from_composition(
        cls: Type[T],
        composition: Composition,
        fields: Optional[List[str]] = None,
        **kwargs,
    ) -> T:
        fields = (
            [
                "elements",
                "nelements",
                "composition",
                "composition_reduced",
                "formula_pretty",
                "formula_anonymous",
                "chemsys",
            ]
            if fields is None
            else fields
        )
        composition = composition.remove_charges()

        elsyms = sorted({e.symbol for e in composition.elements})

        data = {
            "elements": elsyms,
            "nelements": len(elsyms),
            "composition": composition,
            "composition_reduced": composition.reduced_composition.remove_charges(),
            "formula_pretty": composition.reduced_formula,
            "formula_anonymous": composition.anonymized_formula,
            "chemsys": "-".join(elsyms),
        }

        return cls(**{k: v for k, v in data.items() if k in fields}, **kwargs)

    @classmethod
    def from_structure(
        cls: Type[T],
        meta_structure: Structure,
        fields: Optional[List[str]] = None,
        **kwargs,
    ) -> T:
        fields = (
            [
                "nsites",
                "elements",
                "nelements",
                "composition",
                "composition_reduced",
                "formula_pretty",
                "formula_anonymous",
                "chemsys",
                "volume",
                "density",
                "density_atomic",
                "symmetry",
            ]
            if fields is None
            else fields
        )
        comp = meta_structure.composition.remove_charges()
        elsyms = sorted({e.symbol for e in comp.elements})
        symmetry = SymmetryData.from_structure(meta_structure)

        data = {
            "nsites": meta_structure.num_sites,
            "elements": elsyms,
            "nelements": len(elsyms),
            "composition": comp,
            "composition_reduced": comp.reduced_composition,
            "formula_pretty": comp.reduced_formula,
            "formula_anonymous": comp.anonymized_formula,
            "chemsys": "-".join(elsyms),
            "volume": meta_structure.volume,
            "density": meta_structure.density,
            "density_atomic": meta_structure.volume / meta_structure.num_sites,
            "symmetry": symmetry,
        }
        kwargs.update({k: v for k, v in data.items() if k in fields})
        return cls(**kwargs)

Core definition of a Materials Document

`CoreMoleculeDoc` ¶

Bases: MoleculeMetadata

Definition for a core Molecule Document

Source code in emmet/core/material.py

class CoreMoleculeDoc(MoleculeMetadata):
    """
    Definition for a core Molecule Document
    """

    # Only molecule_id is required for all documents
    molecule_id: MPculeID = Field(
        ...,
        description="The ID of this molecule, used as a universal reference across property documents."
        "This comes in the form of an MPID (or int) or MPculeID (or str)",
    )

    molecule: Molecule = Field(
        ...,
        description="The best (typically meaning lowest in energy) structure for this molecule",
    )

    deprecated: bool = Field(
        True,
        description="Whether this molecule document is deprecated.",
    )

    # TODO: Why might a molecule be deprecated?
    deprecation_reasons: Optional[List[str]] = Field(
        None,
        description="List of deprecation tags detailing why this molecules document isn't valid",
    )

    initial_molecules: List[Molecule] = Field(
        [],
        description="Initial molecules used in the DFT geometry optimizations corresponding to this molecule",
    )

    task_ids: List[MPID] = Field(
        [],
        title="Calculation IDs",
        description="List of Calculations IDs used to make this Molecule Document",
    )

    # TODO: Should this be MPID?
    deprecated_tasks: List[str] = Field([], title="Deprecated Tasks")

    calc_types: Optional[Mapping[str, str]] = Field(
        None,
        description="Calculation types for all the tasks that make up this molecule",
    )

    last_updated: datetime = Field(
        description="Timestamp for when this document was last updated",
        default_factory=datetime.utcnow,
    )

    created_at: datetime = Field(
        description="Timestamp for when this document was first created",
        default_factory=datetime.utcnow,
    )

    origins: Optional[List[PropertyOrigin]] = Field(
        None, description="Dictionary for tracking the provenance of properties"
    )

    warnings: List[str] = Field([], description="Any warnings related to this molecule")

    @classmethod
    def from_molecule(
        cls: Type[S], molecule: Molecule, molecule_id: MPculeID, **kwargs
    ) -> S:  # type: ignore[override]
        """
        Builds a molecule document using the minimal amount of information
        """

        return super().from_molecule(  # type: ignore
            meta_molecule=molecule, molecule_id=molecule_id, molecule=molecule, **kwargs
        )

`from_molecule(molecule, molecule_id, **kwargs)` `classmethod` ¶

Builds a molecule document using the minimal amount of information

Source code in emmet/core/material.py

@classmethod
def from_molecule(
    cls: Type[S], molecule: Molecule, molecule_id: MPculeID, **kwargs
) -> S:  # type: ignore[override]
    """
    Builds a molecule document using the minimal amount of information
    """

    return super().from_molecule(  # type: ignore
        meta_molecule=molecule, molecule_id=molecule_id, molecule=molecule, **kwargs
    )

`MaterialsDoc` ¶

Bases: StructureMetadata

Definition for a core Materials Document

Source code in emmet/core/material.py

class MaterialsDoc(StructureMetadata):
    """
    Definition for a core Materials Document
    """

    # Only material_id is required for all documents
    material_id: MPID = Field(
        ...,
        description="The Materials Project ID of the material, used as a universal reference across property documents."
        "This comes in the form: mp-******.",
    )

    structure: Structure = Field(
        ...,
        description="The structure of the this material.",
    )

    deprecated: bool = Field(
        True,
        description="Whether this materials document is deprecated.",
    )

    deprecation_reasons: Optional[List[Union[DeprecationMessage, str]]] = Field(
        None,
        description="List of deprecation tags detailing why this materials document isn't valid.",
    )

    initial_structures: List[Structure] = Field(
        [],
        description="Initial structures used in the DFT optimizations corresponding to this material.",
    )

    task_ids: List[MPID] = Field(
        [],
        description="List of Calculations IDs used to make this Materials Document.",
    )

    deprecated_tasks: List[str] = Field([], title="Deprecated Tasks")

    calc_types: Optional[Mapping[str, str]] = Field(
        None,
        description="Calculation types for all the calculations that make up this material.",
    )

    last_updated: datetime = Field(
        description="Timestamp for when this document was last updated.",
        default_factory=datetime.utcnow,
    )

    created_at: datetime = Field(
        description="Timestamp for when this material document was first created.",
        default_factory=datetime.utcnow,
    )

    origins: Optional[List[PropertyOrigin]] = Field(
        None, description="Dictionary for tracking the provenance of properties."
    )

    warnings: List[str] = Field(
        [], description="Any warnings related to this material."
    )

    @classmethod
    def from_structure(
        cls: Type[T], structure: Structure, material_id: MPID, **kwargs
    ) -> T:  # type: ignore[override]
        """
        Builds a materials document using the minimal amount of information
        """

        return super().from_structure(  # type: ignore
            meta_structure=structure,
            material_id=material_id,
            structure=structure,
            **kwargs,
        )

`from_structure(structure, material_id, **kwargs)` `classmethod` ¶

Builds a materials document using the minimal amount of information

Source code in emmet/core/material.py

@classmethod
def from_structure(
    cls: Type[T], structure: Structure, material_id: MPID, **kwargs
) -> T:  # type: ignore[override]
    """
    Builds a materials document using the minimal amount of information
    """

    return super().from_structure(  # type: ignore
        meta_structure=structure,
        material_id=material_id,
        structure=structure,
        **kwargs,
    )

`PropertyOrigin` ¶

Bases: BaseModel

Provenance document for the origin of properties in a material document

Source code in emmet/core/material.py

class PropertyOrigin(BaseModel):
    """
    Provenance document for the origin of properties in a material document
    """

    name: str = Field(..., description="The property name")
    task_id: Union[MPID, MPculeID] = Field(
        ..., description="The calculation ID this property comes from"
    )
    last_updated: datetime = Field(  # type: ignore
        description="The timestamp when this calculation was last updated",
        default_factory=datetime.utcnow,
    )

    @field_validator("last_updated", mode="before")
    @classmethod
    def handle_datetime(cls, v):
        return convert_datetime(cls, v)

Core definition of a Thermo Document

`DecompositionProduct` ¶

Bases: BaseModel

Entry metadata for a decomposition process

Source code in emmet/core/thermo.py

class DecompositionProduct(BaseModel):
    """
    Entry metadata for a decomposition process
    """

    material_id: Optional[MPID] = Field(
        None,
        description="The Materials Project ID for the material this decomposition points to.",
    )
    formula: Optional[str] = Field(
        None,
        description="The formula of the decomposed material this material decomposes to.",
    )
    amount: Optional[float] = Field(
        None,
        description="The amount of the decomposed material by formula units this this material decomposes to.",
    )

`PhaseDiagramDoc` ¶

Bases: BaseModel

A phase diagram document

Source code in emmet/core/thermo.py

class PhaseDiagramDoc(BaseModel):
    """
    A phase diagram document
    """

    property_name: str = "phase_diagram"

    phase_diagram_id: str = Field(
        ...,
        description="Phase diagram ID consisting of the chemical system and thermo type",
    )

    chemsys: str = Field(
        ...,
        description="Dash-delimited string of elements in the material",
    )

    thermo_type: Union[ThermoType, RunType] = Field(
        ...,
        description="Functional types of calculations involved in the energy mixing scheme.",
    )

    phase_diagram: PhaseDiagram = Field(
        ...,
        description="Phase diagram for the chemical system.",
    )

    last_updated: datetime = Field(
        description="Timestamp for the most recent calculation update for this property",
        default_factory=datetime.utcnow,
    )

`ThermoDoc` ¶

Bases: PropertyDoc

A thermo entry document

Source code in emmet/core/thermo.py

class ThermoDoc(PropertyDoc):
    """
    A thermo entry document
    """

    property_name: str = "thermo"

    thermo_type: Union[ThermoType, RunType] = Field(
        ...,
        description="Functional types of calculations involved in the energy mixing scheme.",
    )

    thermo_id: str = Field(
        ...,
        description="Unique document ID which is composed of the Material ID and thermo data type.",
    )

    uncorrected_energy_per_atom: float = Field(
        ..., description="The total DFT energy of this material per atom in eV/atom."
    )

    energy_per_atom: float = Field(
        ...,
        description="The total corrected DFT energy of this material per atom in eV/atom.",
    )

    energy_uncertainy_per_atom: Optional[float] = Field(None, description="")

    formation_energy_per_atom: Optional[float] = Field(
        None, description="The formation energy per atom in eV/atom."
    )

    energy_above_hull: float = Field(
        ..., description="The energy above the hull in eV/Atom."
    )

    is_stable: bool = Field(
        False,
        description="Flag for whether this material is on the hull and therefore stable.",
    )

    equilibrium_reaction_energy_per_atom: Optional[float] = Field(
        None,
        description="The reaction energy of a stable entry from the neighboring equilibrium stable materials in eV."
        " Also known as the inverse distance to hull.",
    )

    decomposes_to: Optional[List[DecompositionProduct]] = Field(
        None,
        description="List of decomposition data for this material. Only valid for metastable or unstable material.",
    )

    decomposition_enthalpy: Optional[float] = Field(
        None,
        description="Decomposition enthalpy as defined by `get_decomp_and_phase_separation_energy` in pymatgen.",
    )

    decomposition_enthalpy_decomposes_to: Optional[List[DecompositionProduct]] = Field(
        None,
        description="List of decomposition data associated with the decomposition_enthalpy quantity.",
    )

    energy_type: str = Field(
        ...,
        description="The type of calculation this energy evaluation comes from.",
    )

    entry_types: List[str] = Field(
        description="List of available energy types computed for this material."
    )

    entries: Dict[str, Union[ComputedEntry, ComputedStructureEntry]] = Field(
        ...,
        description="List of all entries that are valid for this material."
        " The keys for this dictionary are names of various calculation types.",
    )

    @classmethod
    def from_entries(
        cls,
        entries: List[Union[ComputedEntry, ComputedStructureEntry]],
        thermo_type: Union[ThermoType, RunType],
        phase_diagram: Optional[PhaseDiagram] = None,
        use_max_chemsys: bool = False,
        **kwargs
    ):
        """Produce a list of ThermoDocs from a list of Entry objects

        Args:
            entries (List[Union[ComputedEntry, ComputedStructureEntry]]): List of Entry objects
            thermo_type (Union[ThermoType, RunType]): Thermo type
            phase_diagram (Optional[PhaseDiagram], optional): Already built phase diagram. Defaults to None.
            use_max_chemsys (bool, optional): Whether to only produce thermo docs for materials
                that match the largest chemsys represented in the list. Defaults to False.

        Returns:
            List[ThermoDoc]: List of built thermo doc objects.
        """

        pd = phase_diagram or cls.construct_phase_diagram(entries)

        chemsys = "-".join(sorted([str(e) for e in pd.elements]))

        docs = []

        entries_by_mpid = defaultdict(list)
        for e in entries:
            entries_by_mpid[e.data["material_id"]].append(e)

        entry_quality_scores = {"GGA": 1, "GGA+U": 2, "SCAN": 3, "R2SCAN": 4}

        def _energy_eval(entry: ComputedStructureEntry):
            """
            Helper function to order entries for thermo energy data selection
            - Run type
            - LASPH
            - Energy
            """

            return (
                -1 * entry_quality_scores.get(entry.data["run_type"], 0),
                -1 * int(entry.data.get("aspherical", False)),
                entry.energy,
            )

        for material_id, entry_group in entries_by_mpid.items():
            if (
                use_max_chemsys
                and entry_group[0].composition.chemical_system != chemsys
            ):
                continue

            sorted_entries = sorted(entry_group, key=_energy_eval)

            blessed_entry = sorted_entries[0]

            (decomp, ehull) = pd.get_decomp_and_e_above_hull(blessed_entry)

            builder_meta = EmmetMeta(license=blessed_entry.data.get("license"))

            d = {
                "thermo_id": "{}_{}".format(material_id, str(thermo_type)),
                "material_id": material_id,
                "thermo_type": thermo_type,
                "uncorrected_energy_per_atom": blessed_entry.uncorrected_energy
                / blessed_entry.composition.num_atoms,
                "energy_per_atom": blessed_entry.energy
                / blessed_entry.composition.num_atoms,
                "formation_energy_per_atom": pd.get_form_energy_per_atom(blessed_entry),
                "energy_above_hull": ehull,
                "is_stable": blessed_entry in pd.stable_entries,
                "builder_meta": builder_meta.model_dump(),
            }

            # Uncomment to make last_updated line up with materials.
            # if "last_updated" in blessed_entry.data:
            #     d["last_updated"] = blessed_entry.data["last_updated"]

            # Store different info if stable vs decomposes
            if d["is_stable"]:
                d[
                    "equilibrium_reaction_energy_per_atom"
                ] = pd.get_equilibrium_reaction_energy(blessed_entry)
            else:
                d["decomposes_to"] = [
                    {
                        "material_id": de.data["material_id"],
                        "formula": de.composition.formula,
                        "amount": amt,
                    }
                    for de, amt in decomp.items()
                ]

            try:
                decomp, energy = pd.get_decomp_and_phase_separation_energy(
                    blessed_entry
                )
                d["decomposition_enthalpy"] = energy
                d["decomposition_enthalpy_decomposes_to"] = [
                    {
                        "material_id": de.data["material_id"],
                        "formula": de.composition.formula,
                        "amount": amt,
                    }
                    for de, amt in decomp.items()
                ]
            except ValueError:
                # try/except so this quantity does not take down the builder if it fails:
                # it includes an optimization step that can be fragile in some instances,
                # most likely failure is ValueError, "invalid value encountered in true_divide"
                d["warnings"] = [
                    "Could not calculate decomposition enthalpy for this entry."
                ]

            d["energy_type"] = blessed_entry.parameters.get("run_type", "Unknown")
            d["entry_types"] = []
            d["entries"] = {}

            # Currently, each entry group contains a single entry due to how the compatibility scheme works
            for entry in entry_group:
                d["entry_types"].append(entry.parameters.get("run_type", "Unknown"))
                d["entries"][entry.parameters.get("run_type", "Unknown")] = entry

            d["origins"] = [
                PropertyOrigin(
                    name="energy",
                    task_id=blessed_entry.data["task_id"],
                    last_updated=d.get("last_updated", datetime.utcnow()),
                )
            ]

            docs.append(
                ThermoDoc.from_structure(
                    meta_structure=blessed_entry.structure, **d, **kwargs
                )
            )

        return docs

    @staticmethod
    def construct_phase_diagram(entries) -> PhaseDiagram:
        """
        Efficienty construct a phase diagram using only the lowest entries at every composition
        represented in the entry data passed.

        Args:
            entries (List[ComputedStructureEntry]): List of corrected pymatgen entry objects.

        Returns:
            PhaseDiagram: Pymatgen PhaseDiagram object
        """
        entries_by_comp = defaultdict(list)
        for e in entries:
            entries_by_comp[e.composition.reduced_formula].append(e)

        # Only use lowest entry per composition to speed up QHull in Phase Diagram
        reduced_entries = [
            sorted(comp_entries, key=lambda e: e.energy_per_atom)[0]
            for comp_entries in entries_by_comp.values()
        ]
        pd = PhaseDiagram(reduced_entries)

        # Add back all entries, not just those on the hull
        pd_computed_data = pd.computed_data
        pd_computed_data["all_entries"] = entries
        new_pd = PhaseDiagram(
            entries, elements=pd.elements, computed_data=pd_computed_data
        )
        return new_pd

`construct_phase_diagram(entries)` `staticmethod` ¶

Efficienty construct a phase diagram using only the lowest entries at every composition represented in the entry data passed.

Parameters:

Name	Type	Description	Default
`entries`	`List[ComputedStructureEntry]`	List of corrected pymatgen entry objects.	required

Returns:

Name	Type	Description
`PhaseDiagram`	`PhaseDiagram`	Pymatgen PhaseDiagram object

Source code in emmet/core/thermo.py

@staticmethod
def construct_phase_diagram(entries) -> PhaseDiagram:
    """
    Efficienty construct a phase diagram using only the lowest entries at every composition
    represented in the entry data passed.

    Args:
        entries (List[ComputedStructureEntry]): List of corrected pymatgen entry objects.

    Returns:
        PhaseDiagram: Pymatgen PhaseDiagram object
    """
    entries_by_comp = defaultdict(list)
    for e in entries:
        entries_by_comp[e.composition.reduced_formula].append(e)

    # Only use lowest entry per composition to speed up QHull in Phase Diagram
    reduced_entries = [
        sorted(comp_entries, key=lambda e: e.energy_per_atom)[0]
        for comp_entries in entries_by_comp.values()
    ]
    pd = PhaseDiagram(reduced_entries)

    # Add back all entries, not just those on the hull
    pd_computed_data = pd.computed_data
    pd_computed_data["all_entries"] = entries
    new_pd = PhaseDiagram(
        entries, elements=pd.elements, computed_data=pd_computed_data
    )
    return new_pd

`from_entries(entries, thermo_type, phase_diagram=None, use_max_chemsys=False, **kwargs)` `classmethod` ¶

Produce a list of ThermoDocs from a list of Entry objects

Parameters:

Name	Type	Description	Default
`entries`	`List[Union[ComputedEntry, ComputedStructureEntry]]`	List of Entry objects	required
`thermo_type`	`Union[ThermoType, RunType]`	Thermo type	required
`phase_diagram`	`Optional[PhaseDiagram]`	Already built phase diagram. Defaults to None.	`None`
`use_max_chemsys`	`bool`	Whether to only produce thermo docs for materials that match the largest chemsys represented in the list. Defaults to False.	`False`

Returns:

Type	Description
	List[ThermoDoc]: List of built thermo doc objects.

Source code in emmet/core/thermo.py

@classmethod
def from_entries(
    cls,
    entries: List[Union[ComputedEntry, ComputedStructureEntry]],
    thermo_type: Union[ThermoType, RunType],
    phase_diagram: Optional[PhaseDiagram] = None,
    use_max_chemsys: bool = False,
    **kwargs
):
    """Produce a list of ThermoDocs from a list of Entry objects

    Args:
        entries (List[Union[ComputedEntry, ComputedStructureEntry]]): List of Entry objects
        thermo_type (Union[ThermoType, RunType]): Thermo type
        phase_diagram (Optional[PhaseDiagram], optional): Already built phase diagram. Defaults to None.
        use_max_chemsys (bool, optional): Whether to only produce thermo docs for materials
            that match the largest chemsys represented in the list. Defaults to False.

    Returns:
        List[ThermoDoc]: List of built thermo doc objects.
    """

    pd = phase_diagram or cls.construct_phase_diagram(entries)

    chemsys = "-".join(sorted([str(e) for e in pd.elements]))

    docs = []

    entries_by_mpid = defaultdict(list)
    for e in entries:
        entries_by_mpid[e.data["material_id"]].append(e)

    entry_quality_scores = {"GGA": 1, "GGA+U": 2, "SCAN": 3, "R2SCAN": 4}

    def _energy_eval(entry: ComputedStructureEntry):
        """
        Helper function to order entries for thermo energy data selection
        - Run type
        - LASPH
        - Energy
        """

        return (
            -1 * entry_quality_scores.get(entry.data["run_type"], 0),
            -1 * int(entry.data.get("aspherical", False)),
            entry.energy,
        )

    for material_id, entry_group in entries_by_mpid.items():
        if (
            use_max_chemsys
            and entry_group[0].composition.chemical_system != chemsys
        ):
            continue

        sorted_entries = sorted(entry_group, key=_energy_eval)

        blessed_entry = sorted_entries[0]

        (decomp, ehull) = pd.get_decomp_and_e_above_hull(blessed_entry)

        builder_meta = EmmetMeta(license=blessed_entry.data.get("license"))

        d = {
            "thermo_id": "{}_{}".format(material_id, str(thermo_type)),
            "material_id": material_id,
            "thermo_type": thermo_type,
            "uncorrected_energy_per_atom": blessed_entry.uncorrected_energy
            / blessed_entry.composition.num_atoms,
            "energy_per_atom": blessed_entry.energy
            / blessed_entry.composition.num_atoms,
            "formation_energy_per_atom": pd.get_form_energy_per_atom(blessed_entry),
            "energy_above_hull": ehull,
            "is_stable": blessed_entry in pd.stable_entries,
            "builder_meta": builder_meta.model_dump(),
        }

        # Uncomment to make last_updated line up with materials.
        # if "last_updated" in blessed_entry.data:
        #     d["last_updated"] = blessed_entry.data["last_updated"]

        # Store different info if stable vs decomposes
        if d["is_stable"]:
            d[
                "equilibrium_reaction_energy_per_atom"
            ] = pd.get_equilibrium_reaction_energy(blessed_entry)
        else:
            d["decomposes_to"] = [
                {
                    "material_id": de.data["material_id"],
                    "formula": de.composition.formula,
                    "amount": amt,
                }
                for de, amt in decomp.items()
            ]

        try:
            decomp, energy = pd.get_decomp_and_phase_separation_energy(
                blessed_entry
            )
            d["decomposition_enthalpy"] = energy
            d["decomposition_enthalpy_decomposes_to"] = [
                {
                    "material_id": de.data["material_id"],
                    "formula": de.composition.formula,
                    "amount": amt,
                }
                for de, amt in decomp.items()
            ]
        except ValueError:
            # try/except so this quantity does not take down the builder if it fails:
            # it includes an optimization step that can be fragile in some instances,
            # most likely failure is ValueError, "invalid value encountered in true_divide"
            d["warnings"] = [
                "Could not calculate decomposition enthalpy for this entry."
            ]

        d["energy_type"] = blessed_entry.parameters.get("run_type", "Unknown")
        d["entry_types"] = []
        d["entries"] = {}

        # Currently, each entry group contains a single entry due to how the compatibility scheme works
        for entry in entry_group:
            d["entry_types"].append(entry.parameters.get("run_type", "Unknown"))
            d["entries"][entry.parameters.get("run_type", "Unknown")] = entry

        d["origins"] = [
            PropertyOrigin(
                name="energy",
                task_id=blessed_entry.data["task_id"],
                last_updated=d.get("last_updated", datetime.utcnow()),
            )
        ]

        docs.append(
            ThermoDoc.from_structure(
                meta_structure=blessed_entry.structure, **d, **kwargs
            )
        )

    return docs

Core definition of a Provenance Document