Source code for atomate2.common.flows.defect
"""Flows used in the calculation of defect properties."""
from __future__ import annotations
import logging
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import TYPE_CHECKING
from jobflow import Flow, Job, Maker, OutputReference
from atomate2.common.jobs.defect import (
bulk_supercell_calculation,
get_ccd_documents,
get_charged_structures,
get_defect_entry,
get_supercell_from_prv_calc,
spawn_defect_q_jobs,
spawn_energy_curve_calcs,
)
if TYPE_CHECKING:
from pathlib import Path
import numpy.typing as npt
from emmet.core.tasks import TaskDoc
from pymatgen.analysis.defects.core import Defect
from pymatgen.core.structure import Structure
from pymatgen.entries.computed_entries import ComputedStructureEntry
logger = logging.getLogger(__name__)
DEFAULT_DISTORTIONS = (-1, -0.15, -0.1, -0.05, 0, 0.05, 0.1, 0.15, 1)
[docs]
@dataclass
class ConfigurationCoordinateMaker(Maker):
"""Maker to generate a configuration coordinate diagram.
Parameters
----------
name: str
The name of the flow created by this maker.
relax_maker: Maker
A maker to perform a atomic-position-only relaxation on the defect charge
states.
static_maker: Maker
A maker to perform the single-shot static calculation of the distorted
structures.
distortions: tuple[float, ...]
The distortions, as a fraction of ΔQ, to use in the calculation of the
configuration coordinate diagram.
"""
relax_maker: Maker
static_maker: Maker
name: str = "config coordinate"
distortions: tuple[float, ...] = DEFAULT_DISTORTIONS
[docs]
def make(
self,
structure: Structure,
charge_state1: int,
charge_state2: int,
) -> Flow:
"""Make a job for the calculation of the configuration coordinate diagram.
Parameters
----------
structure
A structure.
charge_state1
The reference charge state of the defect.
charge_state2
The excited charge state of the defect
Returns
-------
Flow
The full workflow for the calculation of the configuration coordinate
diagram.
"""
# use a more descriptive name when possible
if not isinstance(structure, OutputReference):
name = f"{self.name}: {structure.formula}"
if not (
isinstance(charge_state1, OutputReference)
or isinstance(charge_state2, OutputReference)
):
name = (
f"{self.name}: {structure.formula}({charge_state1}-{charge_state2})"
)
# need to wrap this up in a job so that references to undone calculations can
# be passed in
charged_structures = get_charged_structures(
structure, [charge_state1, charge_state2]
)
relax1: Job = self.relax_maker.make(structure=charged_structures.output[0])
relax2: Job = self.relax_maker.make(structure=charged_structures.output[1])
relax1.append_name(" q1")
relax2.append_name(" q2")
dir1 = relax1.output.dir_name
dir2 = relax2.output.dir_name
struct1 = relax1.output.structure
struct2 = relax2.output.structure
add_info1 = {"relaxed_uuid": relax1.uuid, "distorted_uuid": relax2.uuid}
add_info2 = {"relaxed_uuid": relax2.uuid, "distorted_uuid": relax1.uuid}
deformations1, deformations2, ccd_job = self.get_deformation_and_ccd_jobs(
struct1, struct2, dir1, dir2, add_info1, add_info2
)
return Flow(
jobs=[
charged_structures,
relax1,
relax2,
deformations1,
deformations2,
ccd_job,
],
output=ccd_job.output,
name=name,
)
[docs]
def make_from_relaxed_structures(
self,
structure1: Structure,
structure2: Structure,
) -> Flow:
"""
Make a job for the calculation of the configuration coordinate diagram.
Parameters
----------
structure1
The relaxed structure for charge state 1.
structure2
The relaxed structure for charge state 2.
Returns
-------
Flow
The full workflow for the calculation of the configuration coordinate
diagram.
"""
# use a more descriptive name when possible
if not isinstance(structure1, OutputReference):
name = f"{self.name}: {structure1.formula}"
if not (
isinstance(structure1, OutputReference)
or isinstance(structure2, OutputReference)
):
name = (
f"{self.name}: {structure1.formula}"
"({structure1.charge}-{structure2.charge})"
)
deformations1, deformations2, ccd_job = self.get_deformation_and_ccd_jobs(
structure1, structure2
)
return Flow(
jobs=[
deformations1,
deformations2,
ccd_job,
],
output=ccd_job.output,
name=name,
)
[docs]
def get_deformation_and_ccd_jobs(
self,
struct1: Structure,
struct2: Structure,
dir1: str | None = None,
dir2: str | None = None,
add_info1: dict | None = None,
add_info2: dict | None = None,
) -> tuple[Job, Job, Job]:
"""Get the deformation and CCD jobs for the given structures.
Parameters
----------
struct1: Structure
The first structure.
struct2: Structure
The second structure.
dir1: str
The directory of the first structure.
dir2: str
The directory of the second structure.
add_info1: dict
Additional information to write
add_info2: dict
Additional information to write
Returns
-------
deformations1: Job
The deformation job for the first structure.
deformations2: Job
The deformation job for the second structure.
ccd_job: Job
The Job to construct the CCD document.
"""
deformations1 = spawn_energy_curve_calcs(
struct1,
struct2,
distortions=self.distortions,
static_maker=self.static_maker,
prev_dir=dir1,
add_name="q1",
add_info=add_info1,
)
deformations2 = spawn_energy_curve_calcs(
struct2,
struct1,
distortions=self.distortions,
static_maker=self.static_maker,
prev_dir=dir2,
add_name="q2",
add_info=add_info2,
)
deformations1.append_name(" q1")
deformations2.append_name(" q2")
# distortion index with smallest absolute value
min_abs_index = min(
range(len(self.distortions)), key=lambda i: abs(self.distortions[i])
)
ccd_job = get_ccd_documents(
deformations1.output, deformations2.output, undistorted_index=min_abs_index
)
return deformations1, deformations2, ccd_job
[docs]
@dataclass
class FormationEnergyMaker(Maker, ABC):
"""Maker class to help calculate of the formation energy diagram.
Maker class to calculate formation energy diagrams. The main settings for
this maker is the `defect_relax_maker` which contains the settings for the atomic
relaxations that each defect supercell will undergo.
This maker can be used as a stand-alone maker to calculate all of the data
needed to populate the `DefectEntry` object. However, for you can also use this
maker with `uc_bulk` set to True (also set `collect_defect_entry_data` to False
and `bulk_relax_maker` to None). This will skip the bulk supercell calculations
assuming that bulk unit cell calculations are of high enough quality to be used
directly. In these cases, the bulk SC electrostatic potentials need to be
constructed without running a separate bulk SC calculation. This is currently
implemented through the grid re-sampling tools in `mp-pyrho`.
Attributes
----------
defect_relax_maker: Maker
A maker to perform a atomic-position-only relaxation on the defect charge
states. Since these calculations are expensive and the settings might get
messy, it is recommended for each implementation of this maker to check
some of the most important settings in the `relax_maker`. Please see
`FormationEnergyMaker.validate_maker` for more details.
bulk_relax_maker: Maker
If None, the same `defect_relax_maker` will be used for the bulk supercell.
A maker to used to perform the bulk supercell calculation. For marginally
converged calculations, it might be desirable to perform an additional
lattice relaxation on the bulk supercell to make sure the energies are more
reliable. However, if you do relax the bulk supercell, you can inadvertently
change the grid size used in the calculation and thus the representation
of the electrostatic potential which will affect calculation of the Freysoldt
finite-size correction. Therefore, if you do want to perform a bulk supercell
lattice relaxation, you should manually set the grid size.
.. code-block:: python
relax_set = MPRelaxSet(defect.get_supercell_structure())
ng, ngf = relax_set.calculate_ng()
params = ["NGX", "NGY", "NGZ", "NGXF", "NGYF", "NGZF"]
ng_settings = dict(zip(params, ng + ngf))
relax_maker = update_user_incar_settings(relax_maker, ng_settings)
uc_bulk: bool
If True, skip the bulk supercell calculation and only perform the defect
supercell calculations. This is useful for large-scale defect databases.
name: str
The name of the flow created by this maker.
relax_radius:
The radius to include around the defect site for the relaxation.
If "auto", the radius will be set to the maximum that will fit inside
a periodic cell. If None, all atoms will be relaxed.
perturb:
The amount to perturb the sites in the supercell. Only perturb the
sites with selective dynamics set to True. So this setting only works
with `relax_radius`.
validate_charge: bool
Whether to validate the charge of the defect. If True (default), the charge
of the output structure will have to match the charge of the input defect.
This helps catch situations where the charge of the output defect is either
improperly set or improperly parsed before the data is stored in the
database.
collect_defect_entry_data: bool
Whether to collect the defect entry data at the end of the flow.
If True, the output of all the charge states for each symmetry distinct
defect will be collected into a list of dictionaries that can be used
to create a DefectEntry. The data here can be trivially combined with
phase diagram data from the materials project API to create the formation
energy diagrams.
.. note::
Once we remove the requirement for explicit bulk supercell calculations,
this setting will be removed. It is only needed because the bulk supercell
locpot is currently needed for the finite-size correction calculation.
Output format for the DefectEntry data:
.. code-block:: python
[
{
"bulk_dir_name": "computer1:/folder1",
"bulk_locpot": {...},
"bulk_uuid": "48fb6da7-dc2b-4dcb-b1c8-1203c0f72ce3",
"defect_dir_name": "computer1:/folder2",
"defect_entry": {...},
"defect_locpot": {...},
"defect_uuid": "e9af2725-d63c-49b8-a01f-391540211750",
},
{
"bulk_dir_name": "computer1:/folder3",
"bulk_locpot": {...},
"bulk_uuid": "48fb6da7-dc2b-4dcb-b1c8-1203c0f72ce3",
"defect_dir_name": "computer1:/folder4",
"defect_entry": {...},
"defect_locpot": {...},
"defect_uuid": "a1c31095-0494-4eed-9862-95311f80a993",
},
]
"""
defect_relax_maker: Maker
bulk_relax_maker: Maker | None = None
uc_bulk: bool = False
name: str = "formation energy"
relax_radius: float | str | None = None
perturb: float | None = None
validate_charge: bool = True
collect_defect_entry_data: bool = False
def __post_init__(self) -> None:
"""Apply post init updates."""
self.validate_maker()
if self.uc_bulk:
if self.bulk_relax_maker is not None:
raise ValueError("bulk_relax_maker should be None when uc_bulk is True")
if self.collect_defect_entry_data:
raise ValueError(
"collect_defect_entry_data should be False when uc_bulk is True"
)
else:
self.bulk_relax_maker = self.bulk_relax_maker or self.defect_relax_maker
[docs]
def make(
self,
defect: Defect,
bulk_supercell_dir: str | Path | None = None,
supercell_matrix: npt.NDArray | None = None,
defect_index: int | str = "",
) -> Flow:
"""Make a flow to calculate the formation energy diagram.
Start a series of charged supercell relaxations from a single defect
structure.
Parameters
----------
defect: Defect
A `Defect` object representing the Defect we are calculating the
formation energy diagram for.
bulk_supercell_dir: str | Path | None
If provided, the bulk supercell calculation will be skipped.
supercell_matrix: NDArray | None
The supercell transformation matrix. If None, the supercell matrix
will be computed automatically. If `bulk_supercell_dir` is provided,
this parameter will be ignored.
defect_index : int | str
Additional index to give unique names to the defect calculations.
Useful for external bookkeeping of symmetry distinct defects.
Returns
-------
flow: Flow
The workflow to calculate the formation energy diagram.
"""
jobs = []
if not self.uc_bulk:
if bulk_supercell_dir is None:
get_sc_job = bulk_supercell_calculation(
uc_structure=defect.structure,
relax_maker=self.bulk_relax_maker,
sc_mat=supercell_matrix,
get_planar_locpot=self.get_planar_locpot,
)
sc_mat = get_sc_job.output["sc_mat"]
lattice = get_sc_job.output["sc_struct"].lattice
bulk_supercell_dir = get_sc_job.output["dir_name"]
sc_uuid = get_sc_job.output["uuid"]
else:
# all additional reader functions need to be in this job
# b/c they might receive Response objects instead of data.
get_sc_job = get_supercell_from_prv_calc(
uc_structure=defect.structure,
prv_calc_dir=bulk_supercell_dir,
sc_entry_and_locpot_from_prv=self.sc_entry_and_locpot_from_prv,
sc_mat_ref=supercell_matrix,
)
sc_mat = get_sc_job.output["sc_mat"]
lattice = get_sc_job.output["lattice"]
sc_uuid = get_sc_job.output["uuid"]
jobs.append(get_sc_job)
else:
if bulk_supercell_dir is not None:
raise ValueError(
"bulk_supercell_dir should be None when uc_bulk is True."
"We will be using a uc bulk calculation, so no bulk supercell "
"is needed."
)
sc_mat = supercell_matrix
lattice = None
sc_uuid = None
spawn_output = spawn_defect_q_jobs(
defect=defect,
sc_mat=sc_mat,
relax_maker=self.defect_relax_maker,
relaxed_sc_lattice=lattice,
defect_index=defect_index,
add_info={
"bulk_supercell_dir": bulk_supercell_dir,
"bulk_supercell_matrix": sc_mat,
"bulk_supercell_uuid": sc_uuid,
},
relax_radius=self.relax_radius,
perturb=self.perturb,
validate_charge=self.validate_charge,
)
if self.uc_bulk:
# run the function here so we can get the charge state
# calculations ASAP
response = spawn_output.function(
*spawn_output.function_args, **spawn_output.function_kwargs
)
jobs.append(response.replace)
output_ = response.output
else:
# execute this as job so you can string a single bulk sc with multiple
# defect scs
jobs.append(spawn_output)
output_ = spawn_output.output
if self.collect_defect_entry_data:
collection_job = get_defect_entry(
charge_state_summary=spawn_output.output,
bulk_summary=get_sc_job.output,
)
jobs.append(collection_job)
return Flow(
jobs=jobs,
output=output_,
name=self.name,
)
[docs]
@abstractmethod
def sc_entry_and_locpot_from_prv(
self, previous_dir: str
) -> tuple[ComputedStructureEntry, dict]:
"""Copy the output ComputedStructureEntry and Locpot from previous directory.
Parameters
----------
previous_dir: str
The directory to copy from.
Returns
-------
entry: ComputedStructureEntry
"""
[docs]
@abstractmethod
def get_planar_locpot(self, task_doc: TaskDoc) -> dict:
"""Get the Planar Locpot from the TaskDoc.
This is needed just in case the planar average locpot is stored in different
part of the TaskDoc for different codes.
Parameters
----------
task_doc: TaskDoc
The task document.
Returns
-------
planar_locpot: dict
The planar average locpot.
"""
[docs]
@abstractmethod
def validate_maker(self) -> None:
"""Check some key settings in the relax maker.
Since this workflow is pretty complex but allows you to use any
relax maker, it can be easy to make mistakes in the settings.
This method should check the most important settings and raise
an error if something is wrong.
Example: For VASP, the relax maker should have:
`ISIF = 2` and `use_structure_charge = True`
"""