Source code for atomate2.common.jobs.utils

"""Module defining common jobs."""

from __future__ import annotations

from typing import TYPE_CHECKING

from jobflow import Response, job
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer

from atomate2 import SETTINGS
from atomate2.common.files import delete_files
from atomate2.utils.path import strip_hostname

if TYPE_CHECKING:
    from pymatgen.core import Structure



[docs]
@job
def structure_to_primitive(
    structure: Structure, symprec: float = SETTINGS.SYMPREC
) -> Structure:
    """
    Job that creates a standard primitive structure.

    Parameters
    ----------
    structure: Structure object
        input structure that will be transformed
    symprec: float
        precision to determine symmetry

    Returns
    -------
    .Structure
    """
    sga = SpacegroupAnalyzer(structure, symprec=symprec)
    return sga.get_primitive_standard_structure()




[docs]
@job
def structure_to_conventional(
    structure: Structure, symprec: float = SETTINGS.SYMPREC
) -> Structure:
    """
    Job that creates a standard conventional structure.

    Parameters
    ----------
    structure: Structure object
        input structure that will be transformed
    symprec: float
        precision to determine symmetry

    Returns
    -------
    .Structure
    """
    sga = SpacegroupAnalyzer(structure, symprec=symprec)
    return sga.get_conventional_standard_structure()




[docs]
@job
def retrieve_structure_from_materials_project(
    material_id_or_task_id: str,
    use_task_id: bool = False,
    reset_magnetic_moments: bool = False,
) -> Response[Structure]:
    """
    Retrieve a Structure from Materials Project.

    This job is useful for constructing a Flow that always will retrieve the most
    up-to-date data at the time the Flow runs.

    The retrieved Structure will change between subsequent Materials Project database
    releases as old calculation tasks are removed and new, better, calculation tasks
    (e.g. with more accurate lattice parameters) are added.

    Using this job requires that the system where the job runs has a connection to the
    outside internet. It also requires the Materials Project API key to be set
    appropriately via an environment variable or otherwise. Consult the Materials
    Project API documentation for more information.

    Parameters
    ----------
    material_id_or_task_id : str
        The material_id or the task_id for the data record
        being retrieved.
    use_task_id : bool
        If true, will request the Structure from the specific
        calculation task in Materials Project. Structures
        retrieved in this way should not change even between
        new Materials Project database releases.
    reset_magnetic_moments : bool
        If true, will remove any magnetic moment information
        or magnetic ordering on the Structure. Typically,
        this will mean that the Structure will then be
        initialized as ferromagnetic by any child jobs.

    Returns
    -------
    .Response
        A Response with the Structure object as the output,
        and also the database version and specific task_id
        corresponding to that Structure object also stored
    """
    # inline import to avoid required dependency
    try:
        from mp_api.client import MPRester
    except ImportError:
        raise ImportError(
            "This job requires the Materials Project API client "
            "to be installed, via `pip install mp-api` or similar."
        ) from None

    with MPRester() as mpr:
        if use_task_id:
            doc = mpr.tasks.search(material_id_or_task_id, fields=["structure"])[0]
            task_id = material_id_or_task_id
        else:
            doc = mpr.materials.search(
                material_id_or_task_id, fields=["structure", "origins"]
            )[0]
            origins = {prop.name: prop for prop in doc.origins}
            task_id = str(origins["structure"].task_id)

        database_version = mpr.get_database_version()

    structure = doc.structure

    if reset_magnetic_moments and "magmom" in structure.site_properties:
        # Materials Project stores magnetic moments via the `magmom` site property
        # and we can safely assume that here. In general, since magnetic order
        # can be represented in multiple ways such as Species.spin, the
        # following method would be better:
        # CollinearMagneticStructureAnalyzer.get_nonmagnetic_structure()
        structure.remove_site_property("magmom")

    return Response(
        output=structure,
        stored_data={"task_id": task_id, "database_version": database_version},
    )




[docs]
@job
def remove_workflow_files(
    directories: list[str | list[str]],
    file_names: list[str],
    allow_zpath: bool = True,
    **kwargs,
) -> None:
    """
    Remove files from previous jobs.

    For example, at the end of an MP flow, WAVECAR files are generated
    that take up a lot of disk space.
    This utility can automatically remove them following a workflow.

    Parameters
    ----------
        directories : list of str, or list of list of str
            Names of directories to clean output from.
            Can be a list of directories, or a list of lists.
        file_names : list of str
            The list of file names to remove, ex. ["WAVECAR"] rather than a full path
        allow_zpath : bool = True
            Whether to allow checking for zipped output
        **kwargs
            Other kwargs to pass to `delete_files`

    Returns
    -------
        list[str] : list of removed files
    """
    if allow_zpath:
        orig_files = list(file_names)
        for file in orig_files:
            file_names.extend(
                f"{file.removesuffix(ext)}{ext}"
                for ext in (".gz", ".GZ", ".bz2", ".BZ2", ".z", ".Z")
            )

    flattened_dirs = []
    for dir_name in directories:
        if isinstance(dir_name, list | tuple):
            flattened_dirs.extend(dir_name)
        else:
            flattened_dirs.append(dir_name)

    for dir_name in flattened_dirs:
        delete_files(
            strip_hostname(dir_name),
            include_files=file_names,
            allow_missing=True,
            **kwargs,
        )