Source code for atomate2.qchem.files

"""Functions for manipulating QChem files."""

from __future__ import annotations

import logging
import re
from pathlib import Path
from typing import TYPE_CHECKING

from atomate2.common.files import copy_files, get_zfile, gunzip_files, rename_files
from atomate2.utils.file_client import FileClient, auto_fileclient
from atomate2.utils.path import strip_hostname

if TYPE_CHECKING:
    from collections.abc import Sequence


logger = logging.getLogger(__name__)


[docs] @auto_fileclient def copy_qchem_outputs( src_dir: Path | str, src_host: str | None = None, additional_qchem_files: Sequence[str] = (), file_client: FileClient | None = None, ) -> None: """ Copy QChem output files to the current directory. For folders containing multiple calculations (e.g., suffixed with opt_1, opt_2, etc), this function will only copy the files with the highest numbered suffix and the suffix will be removed. Additional qchem files will be also be copied with the same suffix applied. Lastly, this function will gunzip any gzipped files. Parameters ---------- src_dir : str or Path The source directory. src_host : str or None The source hostname used to specify a remote filesystem. Can be given as either "username@remote_host" or just "remote_host" in which case the username will be inferred from the current user. If ``None``, the local filesystem will be used as the source. additional_qchem_files : list of str Additional files to copy. file_client : .FileClient A file client to use for performing file operations. """ src_dir = strip_hostname(src_dir) # TODO: Handle hostnames properly. logger.info(f"Copying QChem inputs from {src_dir}") opt_ext = get_largest_opt_extension(src_dir, src_host, file_client=file_client) directory_listing = file_client.listdir(src_dir, host=src_host) # find required files files = ("mol.qin", "mol.qout", *tuple(additional_qchem_files)) required_files = [get_zfile(directory_listing, r + opt_ext) for r in files] copy_files( src_dir, src_host=src_host, include_files=required_files, file_client=file_client, ) gunzip_files( include_files=required_files, allow_missing=True, file_client=file_client, ) # rename files to remove opt extension if opt_ext: all_files = required_files files_to_rename = { file.name.replace(".gz", ""): file.name.replace(opt_ext, "").replace( ".gz", "" ) for file in all_files } rename_files(files_to_rename, allow_missing=True, file_client=file_client) logger.info("Finished copying inputs")
[docs] @auto_fileclient def get_largest_opt_extension( directory: Path | str, host: str | None = None, file_client: FileClient | None = None, ) -> str: """ Get the largest numbered opt extension of files in a directory. For example, if listdir gives ["mol.qout.opt_0.gz", "mol.qout.opt_1.gz"], this function will return ".opt_1". Parameters ---------- directory : str or Path A directory to search. host : str or None The hostname used to specify a remote filesystem. Can be given as either "username@remote_host" or just "remote_host" in which case the username will be inferred from the current user. If ``None``, the local filesystem will be used. file_client : .FileClient A file client to use for performing file operations. Returns ------- str The opt extension or an empty string if there were not multiple relaxations. """ opt_files = file_client.glob(Path(directory) / "*.opt*", host=host) if len(opt_files) == 0: return "" numbers = [] for file in opt_files: match = re.search(r"\.opt_(\d+)", file.name) if match: numbers.append(match.group(1)) if not numbers: return "" # No matches found max_relax = max(numbers, key=int) return f".opt_{max_relax}"