Skip to content

Emmet-Core

The core module defines the data models for the Materials API (MAPI):

CrystalSystem

Bases: ValueEnum

The crystal system of the lattice

Source code in emmet/core/symmetry.py
19
20
21
22
23
24
25
26
27
28
29
30
class CrystalSystem(ValueEnum):
    """
    The crystal system of the lattice
    """

    tri = "Triclinic"
    mono = "Monoclinic"
    ortho = "Orthorhombic"
    tet = "Tetragonal"
    trig = "Trigonal"
    hex_ = "Hexagonal"
    cubic = "Cubic"

PointGroupData

Bases: BaseModel

Defines symmetry for a molecule document

Source code in emmet/core/symmetry.py
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
class PointGroupData(BaseModel):
    """
    Defines symmetry for a molecule document
    """

    point_group: Optional[str] = Field(
        None, title="Point Group Symbol", description="The point group for the lattice"
    )

    rotation_number: Optional[float] = Field(
        None,
        title="Rotational Symmetry Number",
        description="Rotational symmetry number for the molecule",
    )

    linear: Optional[bool] = Field(
        None, title="Molecule Linearity", description="Is the molecule linear?"
    )

    tolerance: Optional[float] = Field(
        None,
        title="Point Group Analyzer Tolerance",
        description="Distance tolerance to consider sites as symmetrically equivalent.",
    )

    eigen_tolerance: Optional[float] = Field(
        None,
        title="Interia Tensor Eigenvalue Tolerance",
        description="Tolerance to compare eigen values of the inertia tensor.",
    )

    matrix_tolerance: Optional[float] = Field(
        None,
        title="Symmetry Operation Matrix Element Tolerance",
        description="Tolerance used to generate the full set of symmetry operations of the point group.",
    )

    @classmethod
    def from_molecule(cls, molecule: Molecule) -> "PointGroupData":
        tol = SETTINGS.PGATOL
        eigentol = SETTINGS.PGAEIGENTOL
        matrixtol = SETTINGS.PGAMATRIXTOL
        pga = PointGroupAnalyzer(
            molecule,
            tolerance=tol,
            eigen_tolerance=eigentol,
            matrix_tolerance=matrixtol,
        )
        symmetry: Dict[str, Any] = {
            "tolerance": tol,
            "eigen_tolerance": eigentol,
            "matrix_tolerance": matrixtol,
            "point_group": pga.sch_symbol,
        }

        rotational_symmetry_numbers = {
            1.0: ["C1", "Cs", "Ci", "C*v", "S2"],
            2.0: ["C2", "C2h", "C2v", "S4", "D*h"],
            3.0: ["C3", "C3h", "C3v", "S6"],
            4.0: ["C4v", "D4h", "D4d", "D2", "D2h", "D2d"],
            5.0: ["C5v", "Ih"],
            6.0: ["D3", "D3h", "D3d"],
            10.0: ["D5h", "D5d"],
            12.0: ["T", "Td", "Th", "D6h"],
            14.0: ["D7h"],
            16.0: ["D8h"],
            24.0: ["Oh"],
            float("inf"): ["Kh"],
        }

        r = 1.0
        for rot_num, point_groups in rotational_symmetry_numbers.items():
            if symmetry["point_group"] in point_groups:
                r = rot_num
                break
        if symmetry["point_group"] in ["C*v", "D*h"]:
            linear = True
        else:
            linear = False

        symmetry["rotation_number"] = float(r)
        symmetry["linear"] = linear

        return PointGroupData(**symmetry)

SymmetryData

Bases: BaseModel

Defines a symmetry data set for materials documents

Source code in emmet/core/symmetry.py
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
class SymmetryData(BaseModel):
    """
    Defines a symmetry data set for materials documents
    """

    crystal_system: Optional[CrystalSystem] = Field(
        None, title="Crystal System", description="The crystal system for this lattice."
    )

    symbol: Optional[str] = Field(
        None,
        title="Space Group Symbol",
        description="The spacegroup symbol for the lattice.",
    )

    number: Optional[int] = Field(
        None,
        title="Space Group Number",
        description="The spacegroup number for the lattice.",
    )

    point_group: Optional[str] = Field(
        None, title="Point Group Symbol", description="The point group for the lattice."
    )

    symprec: Optional[float] = Field(
        None,
        title="Symmetry Finding Precision",
        description="The precision provided to spglib to determine the symmetry of this structure.",
    )

    angle_tolerance: Optional[float] = Field(
        None,
        title="Angle Tolerance",
        description="Angle tolerance provided to spglib to determine the symmetry of this structure.",
    )

    version: Optional[str] = Field(None, title="spglib version")

    @classmethod
    def from_structure(cls, structure: Structure) -> "SymmetryData":
        symmetry: Dict[str, Any] = {
            "source": "spglib",
            "symbol": None,
            "number": None,
            "point_group": None,
            "crystal_system": None,
            "hall": None,
            "version": spglib.__version__,
            "symprec": SETTINGS.SYMPREC,
            "angle_tolerance": SETTINGS.ANGLE_TOL,
        }

        try:
            sg = SpacegroupAnalyzer(
                structure,
                symprec=symmetry["symprec"],
                angle_tolerance=symmetry["angle_tolerance"],
            )
        except SymmetryUndeterminedError:
            try:
                symmetry["symprec"] = 1e-3
                symmetry["angle_tolerance"] = 1
                sg = SpacegroupAnalyzer(
                    structure,
                    symprec=symmetry["symprec"],
                    angle_tolerance=symmetry["angle_tolerance"],
                )
            except SymmetryUndeterminedError:
                return SymmetryData(**symmetry)

        symmetry.update(
            {
                "symbol": sg.get_space_group_symbol(),
                "number": sg.get_space_group_number(),
                "point_group": sg.get_point_group_symbol(),
                "crystal_system": CrystalSystem(sg.get_crystal_system().title()),
                "hall": sg.get_hall(),
            }
        )

        return SymmetryData(**symmetry)

Core definition of Structure and Molecule metadata.

MoleculeMetadata

Bases: EmmetBaseModel

Mix-in class for molecule metadata.

Source code in emmet/core/structure.py
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
class MoleculeMetadata(EmmetBaseModel):
    """Mix-in class for molecule metadata."""

    charge: Optional[int] = Field(None, description="Charge of the molecule")
    spin_multiplicity: Optional[int] = Field(
        None, description="Spin multiplicity of the molecule"
    )
    natoms: Optional[int] = Field(
        None, description="Total number of atoms in the molecule"
    )
    elements: Optional[List[Element]] = Field(
        None, description="List of elements in the molecule"
    )
    nelements: Optional[int] = Field(None, title="Number of Elements")
    nelectrons: Optional[int] = Field(
        None,
        title="Number of electrons",
        description="The total number of electrons for the molecule",
    )
    composition: Optional[Composition] = Field(
        None, description="Full composition for the molecule"
    )
    composition_reduced: Optional[Composition] = Field(
        None,
        title="Reduced Composition",
        description="Simplified representation of the composition",
    )
    formula_alphabetical: Optional[str] = Field(
        None,
        title="Alphabetical Formula",
        description="Alphabetical molecular formula",
    )
    formula_pretty: Optional[str] = Field(
        None,
        title="Pretty Formula",
        description="Cleaned representation of the formula.",
    )
    formula_anonymous: Optional[str] = Field(
        None,
        title="Anonymous Formula",
        description="Anonymized representation of the formula",
    )
    chemsys: Optional[str] = Field(
        None,
        title="Chemical System",
        description="dash-delimited string of elements in the molecule",
    )
    symmetry: Optional[PointGroupData] = Field(
        None, description="Symmetry data for this molecule"
    )
    species_hash: Optional[str] = Field(
        None,
        description="Weisfeiler Lehman (WL) graph hash using the atom species as the graph "
        "node attribute.",
    )
    coord_hash: Optional[str] = Field(
        None,
        description="Weisfeiler Lehman (WL) graph hash using the atom coordinates as the graph "
        "node attribute.",
    )

    @classmethod
    def from_composition(
        cls: Type[S],
        comp: Composition,
        fields: Optional[List[str]] = None,
        **kwargs,
    ) -> S:
        """
        Create a MoleculeMetadata model from a composition.

        Parameters
        ----------
        comp : .Composition
            A pymatgen composition.
        fields : list of str or None
            Composition fields to include.
        **kwargs
            Keyword arguments that are passed to the model constructor.

        Returns
        -------
        T
            A molecule metadata model.
        """
        fields = (
            [
                "elements",
                "nelements",
                "composition",
                "composition_reduced",
                "formula_alphabetical",
                "formula_pretty",
                "formula_anonymous",
                "chemsys",
            ]
            if fields is None
            else fields
        )
        elsyms = sorted({e.symbol for e in comp.elements})

        data = {
            "elements": elsyms,
            "nelements": len(elsyms),
            "composition": comp,
            "composition_reduced": comp.reduced_composition,
            "formula_alphabetical": comp.alphabetical_formula,
            "formula_pretty": comp.reduced_formula,
            "formula_anonymous": comp.anonymized_formula,
            "chemsys": "-".join(elsyms),
        }

        return cls(**{k: v for k, v in data.items() if k in fields}, **kwargs)

    @classmethod
    def from_molecule(
        cls: Type[S],
        meta_molecule: Molecule,
        fields: Optional[List[str]] = None,
        **kwargs,
    ) -> S:
        fields = (
            [
                "charge",
                "spin_multiplicity",
                "natoms",
                "elements",
                "nelements",
                "nelectrons",
                "composition",
                "composition_reduced",
                "formula_alphabetical",
                "formula_pretty",
                "formula_anonymous",
                "chemsys",
                "symmetry",
                "species_hash",
                "coord_hash",
            ]
            if fields is None
            else fields
        )
        comp = meta_molecule.composition
        elsyms = sorted({e.symbol for e in comp.elements})
        symmetry = PointGroupData.from_molecule(meta_molecule)

        data = {
            "charge": int(meta_molecule.charge),
            "spin_multiplicity": meta_molecule.spin_multiplicity,
            "natoms": len(meta_molecule),
            "elements": elsyms,
            "nelements": len(elsyms),
            "nelectrons": int(meta_molecule.nelectrons),
            "composition": comp,
            "composition_reduced": comp.reduced_composition,
            "formula_alphabetical": comp.alphabetical_formula,
            "formula_pretty": comp.reduced_formula,
            "formula_anonymous": comp.anonymized_formula,
            "chemsys": "-".join(elsyms),
            "symmetry": symmetry,
        }
        if openbabel:
            data["species_hash"] = get_graph_hash(meta_molecule, "specie")
            data["coord_hash"] = get_graph_hash(meta_molecule, "coords")

        return cls(**{k: v for k, v in data.items() if k in fields}, **kwargs)

from_composition(comp, fields=None, **kwargs) classmethod

Create a MoleculeMetadata model from a composition.

Parameters

comp : .Composition A pymatgen composition. fields : list of str or None Composition fields to include. **kwargs Keyword arguments that are passed to the model constructor.

Returns

T A molecule metadata model.

Source code in emmet/core/structure.py
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
@classmethod
def from_composition(
    cls: Type[S],
    comp: Composition,
    fields: Optional[List[str]] = None,
    **kwargs,
) -> S:
    """
    Create a MoleculeMetadata model from a composition.

    Parameters
    ----------
    comp : .Composition
        A pymatgen composition.
    fields : list of str or None
        Composition fields to include.
    **kwargs
        Keyword arguments that are passed to the model constructor.

    Returns
    -------
    T
        A molecule metadata model.
    """
    fields = (
        [
            "elements",
            "nelements",
            "composition",
            "composition_reduced",
            "formula_alphabetical",
            "formula_pretty",
            "formula_anonymous",
            "chemsys",
        ]
        if fields is None
        else fields
    )
    elsyms = sorted({e.symbol for e in comp.elements})

    data = {
        "elements": elsyms,
        "nelements": len(elsyms),
        "composition": comp,
        "composition_reduced": comp.reduced_composition,
        "formula_alphabetical": comp.alphabetical_formula,
        "formula_pretty": comp.reduced_formula,
        "formula_anonymous": comp.anonymized_formula,
        "chemsys": "-".join(elsyms),
    }

    return cls(**{k: v for k, v in data.items() if k in fields}, **kwargs)

StructureMetadata

Bases: EmmetBaseModel

Mix-in class for structure metadata.

Source code in emmet/core/structure.py
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
class StructureMetadata(EmmetBaseModel):
    """Mix-in class for structure metadata."""

    # Structure metadata
    nsites: Optional[int] = Field(
        None, description="Total number of sites in the structure."
    )
    elements: Optional[List[Element]] = Field(
        None, description="List of elements in the material."
    )
    nelements: Optional[int] = Field(None, description="Number of elements.")
    composition: Optional[Composition] = Field(
        None, description="Full composition for the material."
    )
    composition_reduced: Optional[Composition] = Field(
        None,
        title="Reduced Composition",
        description="Simplified representation of the composition.",
    )
    formula_pretty: Optional[str] = Field(
        None,
        title="Pretty Formula",
        description="Cleaned representation of the formula.",
    )
    formula_anonymous: Optional[str] = Field(
        None,
        title="Anonymous Formula",
        description="Anonymized representation of the formula.",
    )
    chemsys: Optional[str] = Field(
        None,
        title="Chemical System",
        description="dash-delimited string of elements in the material.",
    )
    volume: Optional[float] = Field(
        None,
        title="Volume",
        description="Total volume for this structure in Angstroms^3.",
    )

    density: Optional[float] = Field(
        None, title="Density", description="Density in grams per cm^3."
    )

    density_atomic: Optional[float] = Field(
        None,
        title="Packing Density",
        description="The atomic packing density in atoms per cm^3.",
    )

    symmetry: Optional[SymmetryData] = Field(
        None, description="Symmetry data for this material."
    )

    @classmethod
    def from_composition(
        cls: Type[T],
        composition: Composition,
        fields: Optional[List[str]] = None,
        **kwargs,
    ) -> T:
        fields = (
            [
                "elements",
                "nelements",
                "composition",
                "composition_reduced",
                "formula_pretty",
                "formula_anonymous",
                "chemsys",
            ]
            if fields is None
            else fields
        )
        composition = composition.remove_charges()

        elsyms = sorted({e.symbol for e in composition.elements})

        data = {
            "elements": elsyms,
            "nelements": len(elsyms),
            "composition": composition,
            "composition_reduced": composition.reduced_composition.remove_charges(),
            "formula_pretty": composition.reduced_formula,
            "formula_anonymous": composition.anonymized_formula,
            "chemsys": "-".join(elsyms),
        }

        return cls(**{k: v for k, v in data.items() if k in fields}, **kwargs)

    @classmethod
    def from_structure(
        cls: Type[T],
        meta_structure: Structure,
        fields: Optional[List[str]] = None,
        **kwargs,
    ) -> T:
        fields = (
            [
                "nsites",
                "elements",
                "nelements",
                "composition",
                "composition_reduced",
                "formula_pretty",
                "formula_anonymous",
                "chemsys",
                "volume",
                "density",
                "density_atomic",
                "symmetry",
            ]
            if fields is None
            else fields
        )
        comp = meta_structure.composition.remove_charges()
        elsyms = sorted({e.symbol for e in comp.elements})
        symmetry = SymmetryData.from_structure(meta_structure)

        data = {
            "nsites": meta_structure.num_sites,
            "elements": elsyms,
            "nelements": len(elsyms),
            "composition": comp,
            "composition_reduced": comp.reduced_composition,
            "formula_pretty": comp.reduced_formula,
            "formula_anonymous": comp.anonymized_formula,
            "chemsys": "-".join(elsyms),
            "volume": meta_structure.volume,
            "density": meta_structure.density,
            "density_atomic": meta_structure.volume / meta_structure.num_sites,
            "symmetry": symmetry,
        }
        kwargs.update({k: v for k, v in data.items() if k in fields})
        return cls(**kwargs)

Core definition of a Materials Document

CoreMoleculeDoc

Bases: MoleculeMetadata

Definition for a core Molecule Document

Source code in emmet/core/material.py
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
class CoreMoleculeDoc(MoleculeMetadata):
    """
    Definition for a core Molecule Document
    """

    # Only molecule_id is required for all documents
    molecule_id: MPculeID = Field(
        ...,
        description="The ID of this molecule, used as a universal reference across property documents."
        "This comes in the form of an MPID (or int) or MPculeID (or str)",
    )

    molecule: Molecule = Field(
        ...,
        description="The best (typically meaning lowest in energy) structure for this molecule",
    )

    deprecated: bool = Field(
        True,
        description="Whether this molecule document is deprecated.",
    )

    # TODO: Why might a molecule be deprecated?
    deprecation_reasons: Optional[List[str]] = Field(
        None,
        description="List of deprecation tags detailing why this molecules document isn't valid",
    )

    initial_molecules: List[Molecule] = Field(
        [],
        description="Initial molecules used in the DFT geometry optimizations corresponding to this molecule",
    )

    task_ids: List[MPID] = Field(
        [],
        title="Calculation IDs",
        description="List of Calculations IDs used to make this Molecule Document",
    )

    # TODO: Should this be MPID?
    deprecated_tasks: List[str] = Field([], title="Deprecated Tasks")

    calc_types: Optional[Mapping[str, str]] = Field(
        None,
        description="Calculation types for all the tasks that make up this molecule",
    )

    last_updated: datetime = Field(
        description="Timestamp for when this document was last updated",
        default_factory=datetime.utcnow,
    )

    created_at: datetime = Field(
        description="Timestamp for when this document was first created",
        default_factory=datetime.utcnow,
    )

    origins: Optional[List[PropertyOrigin]] = Field(
        None, description="Dictionary for tracking the provenance of properties"
    )

    warnings: List[str] = Field([], description="Any warnings related to this molecule")

    @classmethod
    def from_molecule(
        cls: Type[S], molecule: Molecule, molecule_id: MPculeID, **kwargs
    ) -> S:  # type: ignore[override]
        """
        Builds a molecule document using the minimal amount of information
        """

        return super().from_molecule(  # type: ignore
            meta_molecule=molecule, molecule_id=molecule_id, molecule=molecule, **kwargs
        )

from_molecule(molecule, molecule_id, **kwargs) classmethod

Builds a molecule document using the minimal amount of information

Source code in emmet/core/material.py
183
184
185
186
187
188
189
190
191
192
193
@classmethod
def from_molecule(
    cls: Type[S], molecule: Molecule, molecule_id: MPculeID, **kwargs
) -> S:  # type: ignore[override]
    """
    Builds a molecule document using the minimal amount of information
    """

    return super().from_molecule(  # type: ignore
        meta_molecule=molecule, molecule_id=molecule_id, molecule=molecule, **kwargs
    )

MaterialsDoc

Bases: StructureMetadata

Definition for a core Materials Document

Source code in emmet/core/material.py
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
class MaterialsDoc(StructureMetadata):
    """
    Definition for a core Materials Document
    """

    # Only material_id is required for all documents
    material_id: MPID = Field(
        ...,
        description="The Materials Project ID of the material, used as a universal reference across property documents."
        "This comes in the form: mp-******.",
    )

    structure: Structure = Field(
        ...,
        description="The structure of the this material.",
    )

    deprecated: bool = Field(
        True,
        description="Whether this materials document is deprecated.",
    )

    deprecation_reasons: Optional[List[Union[DeprecationMessage, str]]] = Field(
        None,
        description="List of deprecation tags detailing why this materials document isn't valid.",
    )

    initial_structures: List[Structure] = Field(
        [],
        description="Initial structures used in the DFT optimizations corresponding to this material.",
    )

    task_ids: List[MPID] = Field(
        [],
        description="List of Calculations IDs used to make this Materials Document.",
    )

    deprecated_tasks: List[str] = Field([], title="Deprecated Tasks")

    calc_types: Optional[Mapping[str, str]] = Field(
        None,
        description="Calculation types for all the calculations that make up this material.",
    )

    last_updated: datetime = Field(
        description="Timestamp for when this document was last updated.",
        default_factory=datetime.utcnow,
    )

    created_at: datetime = Field(
        description="Timestamp for when this material document was first created.",
        default_factory=datetime.utcnow,
    )

    origins: Optional[List[PropertyOrigin]] = Field(
        None, description="Dictionary for tracking the provenance of properties."
    )

    warnings: List[str] = Field(
        [], description="Any warnings related to this material."
    )

    @classmethod
    def from_structure(
        cls: Type[T], structure: Structure, material_id: MPID, **kwargs
    ) -> T:  # type: ignore[override]
        """
        Builds a materials document using the minimal amount of information
        """

        return super().from_structure(  # type: ignore
            meta_structure=structure,
            material_id=material_id,
            structure=structure,
            **kwargs,
        )

from_structure(structure, material_id, **kwargs) classmethod

Builds a materials document using the minimal amount of information

Source code in emmet/core/material.py
104
105
106
107
108
109
110
111
112
113
114
115
116
117
@classmethod
def from_structure(
    cls: Type[T], structure: Structure, material_id: MPID, **kwargs
) -> T:  # type: ignore[override]
    """
    Builds a materials document using the minimal amount of information
    """

    return super().from_structure(  # type: ignore
        meta_structure=structure,
        material_id=material_id,
        structure=structure,
        **kwargs,
    )

PropertyOrigin

Bases: BaseModel

Provenance document for the origin of properties in a material document

Source code in emmet/core/material.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
class PropertyOrigin(BaseModel):
    """
    Provenance document for the origin of properties in a material document
    """

    name: str = Field(..., description="The property name")
    task_id: Union[MPID, MPculeID] = Field(
        ..., description="The calculation ID this property comes from"
    )
    last_updated: datetime = Field(  # type: ignore
        description="The timestamp when this calculation was last updated",
        default_factory=datetime.utcnow,
    )

    @field_validator("last_updated", mode="before")
    @classmethod
    def handle_datetime(cls, v):
        return convert_datetime(cls, v)

Core definition of a Thermo Document

DecompositionProduct

Bases: BaseModel

Entry metadata for a decomposition process

Source code in emmet/core/thermo.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
class DecompositionProduct(BaseModel):
    """
    Entry metadata for a decomposition process
    """

    material_id: Optional[MPID] = Field(
        None,
        description="The Materials Project ID for the material this decomposition points to.",
    )
    formula: Optional[str] = Field(
        None,
        description="The formula of the decomposed material this material decomposes to.",
    )
    amount: Optional[float] = Field(
        None,
        description="The amount of the decomposed material by formula units this this material decomposes to.",
    )

PhaseDiagramDoc

Bases: BaseModel

A phase diagram document

Source code in emmet/core/thermo.py
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
class PhaseDiagramDoc(BaseModel):
    """
    A phase diagram document
    """

    property_name: str = "phase_diagram"

    phase_diagram_id: str = Field(
        ...,
        description="Phase diagram ID consisting of the chemical system and thermo type",
    )

    chemsys: str = Field(
        ...,
        description="Dash-delimited string of elements in the material",
    )

    thermo_type: Union[ThermoType, RunType] = Field(
        ...,
        description="Functional types of calculations involved in the energy mixing scheme.",
    )

    phase_diagram: PhaseDiagram = Field(
        ...,
        description="Phase diagram for the chemical system.",
    )

    last_updated: datetime = Field(
        description="Timestamp for the most recent calculation update for this property",
        default_factory=datetime.utcnow,
    )

ThermoDoc

Bases: PropertyDoc

A thermo entry document

Source code in emmet/core/thermo.py
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
class ThermoDoc(PropertyDoc):
    """
    A thermo entry document
    """

    property_name: str = "thermo"

    thermo_type: Union[ThermoType, RunType] = Field(
        ...,
        description="Functional types of calculations involved in the energy mixing scheme.",
    )

    thermo_id: str = Field(
        ...,
        description="Unique document ID which is composed of the Material ID and thermo data type.",
    )

    uncorrected_energy_per_atom: float = Field(
        ..., description="The total DFT energy of this material per atom in eV/atom."
    )

    energy_per_atom: float = Field(
        ...,
        description="The total corrected DFT energy of this material per atom in eV/atom.",
    )

    energy_uncertainy_per_atom: Optional[float] = Field(None, description="")

    formation_energy_per_atom: Optional[float] = Field(
        None, description="The formation energy per atom in eV/atom."
    )

    energy_above_hull: float = Field(
        ..., description="The energy above the hull in eV/Atom."
    )

    is_stable: bool = Field(
        False,
        description="Flag for whether this material is on the hull and therefore stable.",
    )

    equilibrium_reaction_energy_per_atom: Optional[float] = Field(
        None,
        description="The reaction energy of a stable entry from the neighboring equilibrium stable materials in eV."
        " Also known as the inverse distance to hull.",
    )

    decomposes_to: Optional[List[DecompositionProduct]] = Field(
        None,
        description="List of decomposition data for this material. Only valid for metastable or unstable material.",
    )

    decomposition_enthalpy: Optional[float] = Field(
        None,
        description="Decomposition enthalpy as defined by `get_decomp_and_phase_separation_energy` in pymatgen.",
    )

    decomposition_enthalpy_decomposes_to: Optional[List[DecompositionProduct]] = Field(
        None,
        description="List of decomposition data associated with the decomposition_enthalpy quantity.",
    )

    energy_type: str = Field(
        ...,
        description="The type of calculation this energy evaluation comes from.",
    )

    entry_types: List[str] = Field(
        description="List of available energy types computed for this material."
    )

    entries: Dict[str, Union[ComputedEntry, ComputedStructureEntry]] = Field(
        ...,
        description="List of all entries that are valid for this material."
        " The keys for this dictionary are names of various calculation types.",
    )

    @classmethod
    def from_entries(
        cls,
        entries: List[Union[ComputedEntry, ComputedStructureEntry]],
        thermo_type: Union[ThermoType, RunType],
        phase_diagram: Optional[PhaseDiagram] = None,
        use_max_chemsys: bool = False,
        **kwargs
    ):
        """Produce a list of ThermoDocs from a list of Entry objects

        Args:
            entries (List[Union[ComputedEntry, ComputedStructureEntry]]): List of Entry objects
            thermo_type (Union[ThermoType, RunType]): Thermo type
            phase_diagram (Optional[PhaseDiagram], optional): Already built phase diagram. Defaults to None.
            use_max_chemsys (bool, optional): Whether to only produce thermo docs for materials
                that match the largest chemsys represented in the list. Defaults to False.

        Returns:
            List[ThermoDoc]: List of built thermo doc objects.
        """

        pd = phase_diagram or cls.construct_phase_diagram(entries)

        chemsys = "-".join(sorted([str(e) for e in pd.elements]))

        docs = []

        entries_by_mpid = defaultdict(list)
        for e in entries:
            entries_by_mpid[e.data["material_id"]].append(e)

        entry_quality_scores = {"GGA": 1, "GGA+U": 2, "SCAN": 3, "r2SCAN": 4}

        def _energy_eval(entry: Union[ComputedStructureEntry, ComputedEntry]):
            """
            Helper function to order entries for thermo energy data selection
            - Run type
            - LASPH
            - Energy
            """

            return (
                -1 * entry_quality_scores.get(entry.data["run_type"], 0),
                -1 * int(entry.data.get("aspherical", False)),
                entry.energy,
            )

        for material_id, entry_group in entries_by_mpid.items():
            if (
                use_max_chemsys
                and entry_group[0].composition.chemical_system != chemsys
            ):
                continue

            sorted_entries = sorted(entry_group, key=_energy_eval)

            blessed_entry = sorted_entries[0]

            (decomp, ehull) = pd.get_decomp_and_e_above_hull(blessed_entry)  # type: ignore[arg-type]

            builder_meta = EmmetMeta(license=blessed_entry.data.get("license"))

            d = {
                "thermo_id": "{}_{}".format(material_id, str(thermo_type)),
                "material_id": material_id,
                "thermo_type": thermo_type,
                "uncorrected_energy_per_atom": blessed_entry.uncorrected_energy
                / blessed_entry.composition.num_atoms,
                "energy_per_atom": blessed_entry.energy
                / blessed_entry.composition.num_atoms,
                "formation_energy_per_atom": pd.get_form_energy_per_atom(blessed_entry),  # type: ignore[arg-type]
                "energy_above_hull": ehull,
                "is_stable": blessed_entry in pd.stable_entries,
                "builder_meta": builder_meta.model_dump(),
            }

            # Uncomment to make last_updated line up with materials.
            # if "last_updated" in blessed_entry.data:
            #     d["last_updated"] = blessed_entry.data["last_updated"]

            # Store different info if stable vs decomposes
            if d["is_stable"]:
                d[
                    "equilibrium_reaction_energy_per_atom"
                ] = pd.get_equilibrium_reaction_energy(
                    blessed_entry  # type: ignore[arg-type]
                )
            else:
                d["decomposes_to"] = [
                    {
                        "material_id": de.data["material_id"],  # type: ignore[union-attr]
                        "formula": de.composition.formula,
                        "amount": amt,
                    }
                    for de, amt in decomp.items()  # type: ignore[union-attr]
                ]

            try:
                decomp, energy = pd.get_decomp_and_phase_separation_energy(
                    blessed_entry  # type: ignore[arg-type]
                )
                d["decomposition_enthalpy"] = energy
                d["decomposition_enthalpy_decomposes_to"] = [
                    {
                        "material_id": de.data["material_id"],  # type: ignore[union-attr]
                        "formula": de.composition.formula,
                        "amount": amt,
                    }
                    for de, amt in decomp.items()  # type: ignore[union-attr]
                ]
            except ValueError:
                # try/except so this quantity does not take down the builder if it fails:
                # it includes an optimization step that can be fragile in some instances,
                # most likely failure is ValueError, "invalid value encountered in true_divide"
                d["warnings"] = [
                    "Could not calculate decomposition enthalpy for this entry."
                ]

            d["energy_type"] = blessed_entry.parameters.get("run_type", "Unknown")
            d["entry_types"] = []
            d["entries"] = {}

            # Currently, each entry group contains a single entry due to how the compatibility scheme works
            for entry in entry_group:
                d["entry_types"].append(entry.parameters.get("run_type", "Unknown"))
                d["entries"][entry.parameters.get("run_type", "Unknown")] = entry

            d["origins"] = [
                PropertyOrigin(
                    name="energy",
                    task_id=blessed_entry.data["task_id"],
                    last_updated=d.get("last_updated", datetime.utcnow()),
                )
            ]

            docs.append(
                ThermoDoc.from_structure(
                    meta_structure=blessed_entry.structure, **d, **kwargs  # type: ignore[attr-defined]
                )
            )

        return docs

    @staticmethod
    def construct_phase_diagram(entries) -> PhaseDiagram:
        """
        Efficienty construct a phase diagram using only the lowest entries at every composition
        represented in the entry data passed.

        Args:
            entries (List[ComputedStructureEntry]): List of corrected pymatgen entry objects.

        Returns:
            PhaseDiagram: Pymatgen PhaseDiagram object
        """
        entries_by_comp = defaultdict(list)
        for e in entries:
            entries_by_comp[e.composition.reduced_formula].append(e)

        # Only use lowest entry per composition to speed up QHull in Phase Diagram
        reduced_entries = [
            sorted(comp_entries, key=lambda e: e.energy_per_atom)[0]
            for comp_entries in entries_by_comp.values()
        ]
        pd = PhaseDiagram(reduced_entries)

        # Add back all entries, not just those on the hull
        pd_computed_data = pd.computed_data
        pd_computed_data["all_entries"] = entries
        new_pd = PhaseDiagram(
            entries, elements=pd.elements, computed_data=pd_computed_data
        )
        return new_pd

construct_phase_diagram(entries) staticmethod

Efficienty construct a phase diagram using only the lowest entries at every composition represented in the entry data passed.

Parameters:

Name Type Description Default
entries List[ComputedStructureEntry]

List of corrected pymatgen entry objects.

required

Returns:

Name Type Description
PhaseDiagram PhaseDiagram

Pymatgen PhaseDiagram object

Source code in emmet/core/thermo.py
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
@staticmethod
def construct_phase_diagram(entries) -> PhaseDiagram:
    """
    Efficienty construct a phase diagram using only the lowest entries at every composition
    represented in the entry data passed.

    Args:
        entries (List[ComputedStructureEntry]): List of corrected pymatgen entry objects.

    Returns:
        PhaseDiagram: Pymatgen PhaseDiagram object
    """
    entries_by_comp = defaultdict(list)
    for e in entries:
        entries_by_comp[e.composition.reduced_formula].append(e)

    # Only use lowest entry per composition to speed up QHull in Phase Diagram
    reduced_entries = [
        sorted(comp_entries, key=lambda e: e.energy_per_atom)[0]
        for comp_entries in entries_by_comp.values()
    ]
    pd = PhaseDiagram(reduced_entries)

    # Add back all entries, not just those on the hull
    pd_computed_data = pd.computed_data
    pd_computed_data["all_entries"] = entries
    new_pd = PhaseDiagram(
        entries, elements=pd.elements, computed_data=pd_computed_data
    )
    return new_pd

from_entries(entries, thermo_type, phase_diagram=None, use_max_chemsys=False, **kwargs) classmethod

Produce a list of ThermoDocs from a list of Entry objects

Parameters:

Name Type Description Default
entries List[Union[ComputedEntry, ComputedStructureEntry]]

List of Entry objects

required
thermo_type Union[ThermoType, RunType]

Thermo type

required
phase_diagram Optional[PhaseDiagram]

Already built phase diagram. Defaults to None.

None
use_max_chemsys bool

Whether to only produce thermo docs for materials that match the largest chemsys represented in the list. Defaults to False.

False

Returns:

Type Description

List[ThermoDoc]: List of built thermo doc objects.

Source code in emmet/core/thermo.py
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
@classmethod
def from_entries(
    cls,
    entries: List[Union[ComputedEntry, ComputedStructureEntry]],
    thermo_type: Union[ThermoType, RunType],
    phase_diagram: Optional[PhaseDiagram] = None,
    use_max_chemsys: bool = False,
    **kwargs
):
    """Produce a list of ThermoDocs from a list of Entry objects

    Args:
        entries (List[Union[ComputedEntry, ComputedStructureEntry]]): List of Entry objects
        thermo_type (Union[ThermoType, RunType]): Thermo type
        phase_diagram (Optional[PhaseDiagram], optional): Already built phase diagram. Defaults to None.
        use_max_chemsys (bool, optional): Whether to only produce thermo docs for materials
            that match the largest chemsys represented in the list. Defaults to False.

    Returns:
        List[ThermoDoc]: List of built thermo doc objects.
    """

    pd = phase_diagram or cls.construct_phase_diagram(entries)

    chemsys = "-".join(sorted([str(e) for e in pd.elements]))

    docs = []

    entries_by_mpid = defaultdict(list)
    for e in entries:
        entries_by_mpid[e.data["material_id"]].append(e)

    entry_quality_scores = {"GGA": 1, "GGA+U": 2, "SCAN": 3, "r2SCAN": 4}

    def _energy_eval(entry: Union[ComputedStructureEntry, ComputedEntry]):
        """
        Helper function to order entries for thermo energy data selection
        - Run type
        - LASPH
        - Energy
        """

        return (
            -1 * entry_quality_scores.get(entry.data["run_type"], 0),
            -1 * int(entry.data.get("aspherical", False)),
            entry.energy,
        )

    for material_id, entry_group in entries_by_mpid.items():
        if (
            use_max_chemsys
            and entry_group[0].composition.chemical_system != chemsys
        ):
            continue

        sorted_entries = sorted(entry_group, key=_energy_eval)

        blessed_entry = sorted_entries[0]

        (decomp, ehull) = pd.get_decomp_and_e_above_hull(blessed_entry)  # type: ignore[arg-type]

        builder_meta = EmmetMeta(license=blessed_entry.data.get("license"))

        d = {
            "thermo_id": "{}_{}".format(material_id, str(thermo_type)),
            "material_id": material_id,
            "thermo_type": thermo_type,
            "uncorrected_energy_per_atom": blessed_entry.uncorrected_energy
            / blessed_entry.composition.num_atoms,
            "energy_per_atom": blessed_entry.energy
            / blessed_entry.composition.num_atoms,
            "formation_energy_per_atom": pd.get_form_energy_per_atom(blessed_entry),  # type: ignore[arg-type]
            "energy_above_hull": ehull,
            "is_stable": blessed_entry in pd.stable_entries,
            "builder_meta": builder_meta.model_dump(),
        }

        # Uncomment to make last_updated line up with materials.
        # if "last_updated" in blessed_entry.data:
        #     d["last_updated"] = blessed_entry.data["last_updated"]

        # Store different info if stable vs decomposes
        if d["is_stable"]:
            d[
                "equilibrium_reaction_energy_per_atom"
            ] = pd.get_equilibrium_reaction_energy(
                blessed_entry  # type: ignore[arg-type]
            )
        else:
            d["decomposes_to"] = [
                {
                    "material_id": de.data["material_id"],  # type: ignore[union-attr]
                    "formula": de.composition.formula,
                    "amount": amt,
                }
                for de, amt in decomp.items()  # type: ignore[union-attr]
            ]

        try:
            decomp, energy = pd.get_decomp_and_phase_separation_energy(
                blessed_entry  # type: ignore[arg-type]
            )
            d["decomposition_enthalpy"] = energy
            d["decomposition_enthalpy_decomposes_to"] = [
                {
                    "material_id": de.data["material_id"],  # type: ignore[union-attr]
                    "formula": de.composition.formula,
                    "amount": amt,
                }
                for de, amt in decomp.items()  # type: ignore[union-attr]
            ]
        except ValueError:
            # try/except so this quantity does not take down the builder if it fails:
            # it includes an optimization step that can be fragile in some instances,
            # most likely failure is ValueError, "invalid value encountered in true_divide"
            d["warnings"] = [
                "Could not calculate decomposition enthalpy for this entry."
            ]

        d["energy_type"] = blessed_entry.parameters.get("run_type", "Unknown")
        d["entry_types"] = []
        d["entries"] = {}

        # Currently, each entry group contains a single entry due to how the compatibility scheme works
        for entry in entry_group:
            d["entry_types"].append(entry.parameters.get("run_type", "Unknown"))
            d["entries"][entry.parameters.get("run_type", "Unknown")] = entry

        d["origins"] = [
            PropertyOrigin(
                name="energy",
                task_id=blessed_entry.data["task_id"],
                last_updated=d.get("last_updated", datetime.utcnow()),
            )
        ]

        docs.append(
            ThermoDoc.from_structure(
                meta_structure=blessed_entry.structure, **d, **kwargs  # type: ignore[attr-defined]
            )
        )

    return docs

Core definition of a Provenance Document

Author

Bases: BaseModel

Author information

Source code in emmet/core/provenance.py
27
28
29
30
31
32
33
class Author(BaseModel):
    """
    Author information
    """

    name: Optional[str] = Field(None)
    email: Optional[str] = Field(None)

Database

Bases: ValueEnum

Database identifiers for provenance IDs

Source code in emmet/core/provenance.py
17
18
19
20
21
22
23
24
class Database(ValueEnum):
    """
    Database identifiers for provenance IDs
    """

    ICSD = "icsd"
    Pauling_Files = "pf"
    COD = "cod"

History

Bases: BaseModel

History of the material provenance

Source code in emmet/core/provenance.py
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
class History(BaseModel):
    """
    History of the material provenance
    """

    name: str
    url: str
    description: Optional[Dict] = Field(
        None, description="Dictionary of extra data for this history node."
    )

    @model_validator(mode="before")
    @classmethod
    def str_to_dict(cls, values):
        if isinstance(values.get("description"), str):
            values["description"] = {"string": values.get("description")}
        return values

ProvenanceDoc

Bases: PropertyDoc

A provenance property block

Source code in emmet/core/provenance.py
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
class ProvenanceDoc(PropertyDoc):
    """
    A provenance property block
    """

    property_name: str = "provenance"

    created_at: datetime = Field(
        ...,
        description="creation date for the first structure corresponding to this material",
    )

    references: List[str] = Field(
        [], description="Bibtex reference strings for this material"
    )

    authors: List[Author] = Field([], description="List of authors for this material")

    remarks: List[str] = Field(
        [], description="List of remarks for the provenance of this material"
    )

    tags: List[str] = Field([])

    theoretical: bool = Field(
        True, description="If this material has any experimental provenance or not"
    )

    database_IDs: Dict[Database, List[str]] = Field(
        dict(), description="Database IDs corresponding to this material"
    )

    history: List[History] = Field(
        [],
        description="List of history nodes specifying the transformations or orignation"
        " of this material for the entry closest matching the material input",
    )

    @field_validator("created_at", mode="before")
    @classmethod
    def handle_datetime(cls, v):
        return convert_datetime(cls, v)

    @field_validator("authors")
    @classmethod
    def remove_duplicate_authors(cls, authors):
        authors_dict = {entry.name.lower(): entry for entry in authors}
        return list(authors_dict.values())

    @classmethod
    def from_SNLs(
        cls, material_id: MPID, structure: Structure, snls: List[SNLDict], **kwargs
    ) -> "ProvenanceDoc":
        """
        Converts legacy Pymatgen SNLs into a single provenance document
        """

        assert (
            len(snls) > 0
        ), "Error must provide a non-zero list of SNLs to convert from SNLs"

        # Choose earliest created_at
        created_at = min([snl.about.created_at for snl in snls])
        # last_updated = max([snl.about.created_at for snl in snls])

        # Choose earliest history
        history = sorted(snls, key=lambda snl: snl.about.created_at)[0].about.history

        # Aggregate all references into one dict to remove duplicates
        refs = {}
        for snl in snls:
            try:
                set_strict_mode(False)
                entries = parse_string(snl.about.references, bib_format="bibtex")
                refs.update(entries.entries)
            except Exception as e:
                warnings.warn(
                    f"Failed parsing bibtex: {snl.about.references} due to {e}"
                )

        bib_data = BibliographyData(entries=refs)

        references = [ref.to_string("bibtex") for ref in bib_data.entries.values()]

        # TODO: Maybe we should combine this robocrystallographer?
        # TODO: Refine these tags / remarks
        remarks = list(set([remark for snl in snls for remark in snl.about.remarks]))
        tags = [r for r in remarks if len(r) < 140]

        authors = [entry for snl in snls for entry in snl.about.authors]

        # Check if this entry is experimental
        exp_vals = []
        for snl in snls:
            for entry in snl.about.history:
                if entry.description is not None:
                    exp_vals.append(entry.description.get("experimental", False))

        experimental = any(exp_vals)

        # Aggregate all the database IDs
        snl_ids = {snl.snl_id for snl in snls}
        db_ids = {
            Database(db_id): [snl_id for snl_id in snl_ids if db_id in snl_id]
            for db_id in map(str, Database)  # type: ignore
        }

        # remove Nones and empty lists
        db_ids = {k: list(filter(None, v)) for k, v in db_ids.items()}
        db_ids = {k: v for k, v in db_ids.items() if len(v) > 0}

        fields = {
            "created_at": created_at,
            "references": references,
            "authors": authors,
            "remarks": remarks,
            "tags": tags,
            "database_IDs": db_ids,
            "theoretical": not experimental,
            "history": history,
        }

        return super().from_structure(
            material_id=material_id, meta_structure=structure, **fields, **kwargs
        )

from_SNLs(material_id, structure, snls, **kwargs) classmethod

Converts legacy Pymatgen SNLs into a single provenance document

Source code in emmet/core/provenance.py
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
@classmethod
def from_SNLs(
    cls, material_id: MPID, structure: Structure, snls: List[SNLDict], **kwargs
) -> "ProvenanceDoc":
    """
    Converts legacy Pymatgen SNLs into a single provenance document
    """

    assert (
        len(snls) > 0
    ), "Error must provide a non-zero list of SNLs to convert from SNLs"

    # Choose earliest created_at
    created_at = min([snl.about.created_at for snl in snls])
    # last_updated = max([snl.about.created_at for snl in snls])

    # Choose earliest history
    history = sorted(snls, key=lambda snl: snl.about.created_at)[0].about.history

    # Aggregate all references into one dict to remove duplicates
    refs = {}
    for snl in snls:
        try:
            set_strict_mode(False)
            entries = parse_string(snl.about.references, bib_format="bibtex")
            refs.update(entries.entries)
        except Exception as e:
            warnings.warn(
                f"Failed parsing bibtex: {snl.about.references} due to {e}"
            )

    bib_data = BibliographyData(entries=refs)

    references = [ref.to_string("bibtex") for ref in bib_data.entries.values()]

    # TODO: Maybe we should combine this robocrystallographer?
    # TODO: Refine these tags / remarks
    remarks = list(set([remark for snl in snls for remark in snl.about.remarks]))
    tags = [r for r in remarks if len(r) < 140]

    authors = [entry for snl in snls for entry in snl.about.authors]

    # Check if this entry is experimental
    exp_vals = []
    for snl in snls:
        for entry in snl.about.history:
            if entry.description is not None:
                exp_vals.append(entry.description.get("experimental", False))

    experimental = any(exp_vals)

    # Aggregate all the database IDs
    snl_ids = {snl.snl_id for snl in snls}
    db_ids = {
        Database(db_id): [snl_id for snl_id in snl_ids if db_id in snl_id]
        for db_id in map(str, Database)  # type: ignore
    }

    # remove Nones and empty lists
    db_ids = {k: list(filter(None, v)) for k, v in db_ids.items()}
    db_ids = {k: v for k, v in db_ids.items() if len(v) > 0}

    fields = {
        "created_at": created_at,
        "references": references,
        "authors": authors,
        "remarks": remarks,
        "tags": tags,
        "database_IDs": db_ids,
        "theoretical": not experimental,
        "history": history,
    }

    return super().from_structure(
        material_id=material_id, meta_structure=structure, **fields, **kwargs
    )

SNLAbout

Bases: BaseModel

A data dictionary defining extra fields in a SNL

Source code in emmet/core/provenance.py
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
class SNLAbout(BaseModel):
    """A data dictionary defining extra fields in a SNL"""

    references: str = Field(
        "", description="Bibtex reference strings for this material."
    )

    authors: List[Author] = Field([], description="List of authors for this material.")

    remarks: List[str] = Field(
        [], description="List of remarks for the provenance of this material."
    )

    tags: List[str] = Field([])

    database_IDs: Dict[Database, List[str]] = Field(
        dict(), description="Database IDs corresponding to this material."
    )

    history: List[History] = Field(
        [],
        description="List of history nodes specifying the transformations or orignation"
        " of this material for the entry closest matching the material input.",
    )

    created_at: datetime = Field(
        default_factory=datetime.utcnow, description="The creation date for this SNL."
    )

    @field_validator("created_at", mode="before")
    @classmethod
    def handle_datetime(cls, v):
        return convert_datetime(cls, v)

SNLDict

Bases: BaseModel

Pydantic validated dictionary for SNL

Source code in emmet/core/provenance.py
90
91
92
93
94
95
class SNLDict(BaseModel):
    """Pydantic validated dictionary for SNL"""

    about: SNLAbout

    snl_id: str = Field(..., description="The SNL ID for this entry")

Core definition for Polar property Document

BornEffectiveCharges

Bases: BaseModel

A block for the Born effective charges

Source code in emmet/core/polar.py
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
class BornEffectiveCharges(BaseModel):
    """
    A block for the Born effective charges
    """

    value: Optional[List[Matrix3D]] = Field(
        None, description="Value of the Born effective charges."
    )

    symmetrized_value: Optional[List[Matrix3D]] = Field(
        None,
        description="Value of the Born effective charges after symmetrization to obey the"
        "charge neutrality sum rule.",
    )

    cnsr_break: Optional[float] = Field(
        None,
        description="The maximum breaking of the charge neutrality sum "
        "rule (CNSR) in the Born effective charges.",
    )

DielectricDoc

Bases: PropertyDoc

A dielectric property block

Source code in emmet/core/polar.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
class DielectricDoc(PropertyDoc):
    """
    A dielectric property block
    """

    property_name: str = "dielectric"

    total: Matrix3D = Field(description="Total dielectric tensor.")
    ionic: Matrix3D = Field(description="Ionic contribution to dielectric tensor.")
    electronic: Matrix3D = Field(
        description="Electronic contribution to dielectric tensor."
    )

    e_total: float = Field(description="Total electric permittivity.")
    e_ionic: float = Field(
        description="Electric permittivity from atomic rearrangement."
    )
    e_electronic: float = Field(
        description="Electric permittivity due to electrons rearrangement."
    )

    n: float = Field(description="Refractive index.")

    @classmethod
    def from_ionic_and_electronic(
        cls,
        material_id: MPID,
        ionic: Matrix3D,
        electronic: Matrix3D,
        structure: Structure,
        **kwargs,
    ):
        ionic_tensor = Tensor(ionic).convert_to_ieee(structure)
        electronic_tensor = Tensor(electronic).convert_to_ieee(structure)

        total = ionic_tensor + electronic_tensor

        return super().from_structure(
            meta_structure=structure,
            material_id=material_id,
            **{
                "total": total.tolist(),
                "ionic": ionic_tensor.tolist(),
                "electronic": electronic_tensor.tolist(),
                "e_total": np.average(np.diagonal(total)),
                "e_ionic": np.average(np.diagonal(ionic_tensor)),
                "e_electronic": np.average(np.diagonal(electronic_tensor)),
                "n": np.sqrt(np.average(np.diagonal(electronic_tensor))),
            },
            **kwargs,
        )

IRDielectric

Bases: BaseModel

A block for the pymatgen IRDielectricTensor object

Source code in emmet/core/polar.py
163
164
165
166
167
168
169
170
class IRDielectric(BaseModel):
    """
    A block for the pymatgen IRDielectricTensor object
    """

    ir_dielectric_tensor: Optional[dict] = Field(
        None, description="Serialized version of a pymatgen IRDielectricTensor object."
    )

PiezoelectricDoc

Bases: PropertyDoc

A dielectric package block

Source code in emmet/core/polar.py
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
class PiezoelectricDoc(PropertyDoc):
    """
    A dielectric package block
    """

    property_name: str = "piezoelectric"

    total: PiezoTensor = Field(description="Total piezoelectric tensor in C/m²")
    ionic: PiezoTensor = Field(
        description="Ionic contribution to piezoelectric tensor in C/m²"
    )
    electronic: PiezoTensor = Field(
        description="Electronic contribution to piezoelectric tensor in C/m²"
    )

    e_ij_max: float = Field(description="Piezoelectric modulus")
    max_direction: List[int] = Field(
        description="Miller direction for maximum piezo response"
    )
    strain_for_max: List[float] = Field(
        description="Normalized strain direction for maximum piezo repsonse"
    )

    @classmethod
    def from_ionic_and_electronic(
        cls,
        material_id: MPID,
        ionic: PiezoTensor,
        electronic: PiezoTensor,
        structure: Structure,
        **kwargs,
    ):
        ionic_tensor = BasePiezoTensor.from_vasp_voigt(ionic)
        electronic_tensor = BasePiezoTensor.from_vasp_voigt(electronic)
        total: BasePiezoTensor = ionic_tensor + electronic_tensor  # type: ignore[assignment]

        # Symmeterize Convert to IEEE orientation
        total = total.convert_to_ieee(structure)
        ionic_tensor = ionic_tensor.convert_to_ieee(structure)
        electronic_tensor = electronic_tensor.convert_to_ieee(structure)

        directions, charges, strains = np.linalg.svd(total.voigt, full_matrices=False)
        max_index = np.argmax(np.abs(charges))

        max_direction = directions[max_index]

        # Allow a max miller index of 10
        min_val = np.abs(max_direction)
        min_val = min_val[min_val > (np.max(min_val) / SETTINGS.MAX_PIEZO_MILLER)]
        min_val = np.min(min_val)

        return super().from_structure(
            meta_structure=structure,
            material_id=material_id,
            **{
                "total": total.zeroed().voigt.tolist(),
                "ionic": ionic_tensor.zeroed().voigt.tolist(),
                "electronic": electronic_tensor.zeroed().voigt.tolist(),
                "e_ij_max": charges[max_index],
                "max_direction": tuple(np.round(max_direction / min_val)),
                "strain_for_max": tuple(strains[max_index]),
            },
            **kwargs,
        )