Source code for atomate2.qchem.sets.base

"""Module defining base QChem input set and generator."""

from __future__ import annotations

import os
from dataclasses import dataclass, field
from pathlib import Path
from typing import TYPE_CHECKING, Any, Literal

from monty.io import zopen
from pymatgen.io.core import InputGenerator, InputSet
from pymatgen.io.qchem.inputs import QCInput
from pymatgen.io.qchem.utils import lower_and_check_unique

if TYPE_CHECKING:
    from pymatgen.core.structure import Molecule

__author__ = "Alex Ganose, Ryan Kingsbury, Rishabh D Guha"
__copyright__ = "Copyright 2018-2022, The Materials Project"
__version__ = "0.1"

# _BASE_QCHEM_SET =
# loadfn(resource_filename("atomate2.qchem.sets", "BaseQchemSet.yaml"))


[docs] class QCInputSet(InputSet): """ A class to represent a QChem input file as a QChem InputSet. Parameters ---------- qcinput A QCInput object optional_files Any other optional input files supplied as a dict of ``{filename: object}``. The objects should follow standard pymatgen conventions in implementing an ``as_dict()`` and ``from_dict()`` method. """ def __init__( self, qcinput: QCInput, optional_files: dict | None = None, ) -> None: self.qcinput = qcinput self.optional_files = {} if optional_files is None else optional_files
[docs] def write_input( self, directory: str | Path, overwrite: bool = True, ) -> None: """Write QChem input file to directory. Parameters ---------- directory Directory to write input files to. overwrite Whether to overwrite an input file if it already exists. """ os.makedirs(directory, exist_ok=True) directory = Path(directory) inputs = {"Input_Dict": self.qcinput} inputs.update(self.optional_files) for key, val in inputs.items(): inp_key = "mol.qin" if key == "Input_Dict" else f"{key}.qin" if val is not None and (overwrite or not (directory / inp_key).exists()): # should this be open instead of zopen? can QChem inputs be gzipped? with zopen(directory / inp_key, "wt") as file: file.write(str(val)) elif not overwrite and (directory / inp_key).exists(): raise FileExistsError(f"{directory / inp_key} already exists.")
[docs] @staticmethod def from_directory( directory: str | Path, optional_files: dict = None ) -> QCInputSet: """Load a set of QChem inputs from a directory. Parameters ---------- directory Directory to read QChem inputs from. optional_files Optional files to read in as well as a dict of {filename: Object class}. Object class must have a static/class method from_file """ directory = Path(directory) objs = {"Input_Dict": QCInput} inputs = {} for name, obj in objs.items(): file_path = directory / ("mol.qin" if name == "Input_Dict" else name) if file_path.exists(): inputs[name.lower()] = obj.from_file(file_path) optional_inputs = {} if optional_files is not None: for name, obj in optional_files.items(): optional_inputs[name] = obj.from_file(directory / name) return QCInputSet(inputs["input_dict"], optional_files=optional_inputs)
# Todo # Implement is_valid property
[docs] @dataclass class QCInputGenerator(InputGenerator): """ A dataclass to generate QChem input set. Parameters ---------- job_type : str QChem job type to run. Valid options are "opt" for optimization, "sp" for single point, "freq" for frequency calculation, or "force" for force evaluation. basis_set : str Basis set to use. For example, "def2-tzvpd". scf_algorithm : str Algorithm to use for converging the SCF. Recommended choices are "DIIS", "GDM", and "DIIS_GDM". Other algorithms supported by Qchem's GEN_SCFMAN module will also likely perform well. Refer to the QChem manual for further details. dft_rung : int Select the DFT functional among 5 recommended levels of theory, in order of increasing accuracy/cost. 1 = SPW92, 2 = B97-D3(BJ), 3 = B97M-V, 4 = ωB97M-V, 5 = ωB97M-(2). (Default: 4) To set a functional not given by one of the above, set the overwrite_inputs argument to {"method":"<NAME OF FUNCTIONAL>"} **Note that the "rungs" in this argument do NOT correspond to rungs on "Jacob's Ladder of Density Functional Approximations"** pcm_dielectric : float Dielectric constant to use for PCM implicit solvation model. (Default: None) smd_solvent : str Solvent to use for SMD implicit solvation model. (Default: None) Examples include "water", "ethanol", "methanol", and "acetonitrile". Refer to the QChem manual for a complete list of solvents available. To define a custom solvent, set this argument to "custom" and populate custom_smd with the necessary parameters. **Note that only one of smd_solvent and pcm_dielectric may be set.** custom_smd : str List of parameters to define a custom solvent in SMD. (Default: None) Must be given as a string of seven comma separated values in the following order: "dielectric, refractive index, acidity, basicity, surface tension, aromaticity, electronegative halogenicity" Refer to the QChem manual for further details. opt_dict : dict A dictionary of opt sections, where each opt section is a key and the corresponding values are a list of strings. Strings must be formatted as instructed by the QChem manual. The different opt sections are: CONSTRAINT, FIXED, DUMMY, and CONNECT. Ex. opt = {"CONSTRAINT": ["tors 2 3 4 5 25.0", "tors 2 5 7 9 80.0"], "FIXED": ["2 XY"]} scan_dict : dict A dictionary of scan variables. Because two constraints of the same type are allowed (for instance, two torsions or two bond stretches), each TYPE of variable (stre, bend, tors) should be its own key in the dict, rather than each variable. Note that the total number of variable (sum of lengths of all lists) CANNOT be more than two. Ex. scan_variables = {"stre": ["3 6 1.5 1.9 0.1"], "tors": ["1 2 3 4 -180 180 15"]} max_scf_cycles : int Maximum number of SCF iterations. (Default: 100) geom_opt_max_cycles : int Maximum number of geometry optimization iterations. (Default: 200) plot_cubes : bool Whether to write CUBE files of the electron density. (Default: False) nbo_params : dict A dict containing the desired NBO params. Note that a key:value pair of "version":7 will trigger NBO7 analysis. Otherwise, NBO5 analysis will be performed, including if an empty dict is passed. Besides a key of "version", all other key:value pairs will be written into the $nbo section of the QChem input file. (Default: False) new_geom_opt : dict A dict containing parameters for the $geom_opt section of the QChem input file, which control the new geometry optimizer available starting in version 5.4.2. Further note that even passing an empty dictionary will trigger the new optimizer. (Default: False) overwrite_inputs : dict Dictionary of QChem input sections to add or overwrite variables. The currently available sections (keys) are rem, pcm, solvent, smx, opt, scan, van_der_waals, and plots. The value of each key is a dictionary of key value pairs relevant to that section. For example, to add a new variable to the rem section that sets symmetry to false, use overwrite_inputs = {"rem": {"symmetry": "false"}} **Note that if something like basis is added to the rem dict it will overwrite the default basis.** **Note that supplying a van_der_waals section here will automatically modify the PCM "radii" setting to "read".** **Note that all keys must be given as strings, even when they are numbers!** vdw_mode : str Method of specifying custom van der Waals radii. Either "atomic" (default) or "sequential". In "atomic" mode, dict keys represent the atomic number associated with each radius (e.g., 12 = carbon). In "sequential" mode, dict keys represent the sequential position of a single specific atom in the input structure. """ job_type: str = field(default=None) basis_set: str = field(default=None) scf_algorithm: str = field(default=None) dft_rung: int = field(default=4) pcm_dielectric: float = field(default=None) smd_solvent: str = field(default=None) custom_smd: str = field(default=None) opt_dict: dict[str, Any] = field(default_factory=dict) scan_dict: dict[str, Any] = field(default_factory=dict) max_scf_cycles: int = field(default=100) geom_opt_max_cycles: int = field(default=200) plot_cubes: bool = field(default=False) nbo_params: dict[str, Any] = field(default_factory=dict) new_geom_opt: dict[str, Any] = field(default_factory=dict) overwrite_inputs: dict[str, str] = field(default_factory=dict) vdw_mode: Literal["atomic", "sequential"] = field(default="atomic") rem_dict: dict[str, Any] = field(default_factory=dict) vdw_dict: dict[str, float] = field(default_factory=dict) pcm_dict: dict[str, Any] = field(default_factory=dict) solv_dict: dict[str, Any] = field(default_factory=dict) smx_dict: dict[str, Any] = field(default_factory=dict) plots_dict: dict[str, Any] = field(default_factory=dict) def __post_init__(self) -> None: """Post init formatting of arguments.""" self.rem_dict = { "job_type": self.job_type, "basis": self.basis_set, "max_scf_cycles": str(self.max_scf_cycles), "gen_scfman": "true", "xc_grid": "3", "thresh": "14", "s2thresh": "16", "scf_algorithm": self.scf_algorithm, "resp_charges": "true", "symmetry": "false", "sym_ignore": "true", } rung_2_func = ["spw92", "b97d3", "b97mv", "wb97mv", "wb97m(2)"] qc_method = {i + 1: e for i, e in enumerate(rung_2_func)} if qc_method.get(self.dft_rung): self.rem_dict["method"] = qc_method.get(self.dft_rung) else: raise ValueError("Provided DFT rung should be between 1 and 5!") if self.dft_rung == 2: self.rem_dict["dft_D"] = "D3_BJ" if self.job_type.lower() in ["opt", "ts", "pes_scan"]: self.rem_dict["geom_opt_max_cycles"] = str(self.geom_opt_max_cycles) if self.pcm_dielectric and self.smd_solvent: raise ValueError( "Only one of pcm or smd may be used as an implicit solvent. Not both!" ) if self.pcm_dielectric: pcm_defaults = { "heavypoints": "194", "hpoints": "194", "radii": "uff", "theory": "cpcm", "vdwscale": "1.1", } self.pcm_dict = pcm_defaults self.solv_dict["dielectric"] = self.pcm_dielectric self.rem_dict["solvent_method"] = "pcm" if self.smd_solvent: if self.smd_solvent == "custom": self.smx_dict["solvent"] = "other" else: self.smx_dict["solvent"] = self.smd_solvent self.rem_dict["solvent_method"] = "smd" self.rem_dict["ideriv"] = "1" if self.smd_solvent in ("custom", "other") and self.custom_smd is None: raise ValueError( "A user-defined SMD requires passing custom_smd," "a string of seven comma separated values in the following order: " "dielectric, refractive index, acidity, basicity, surface tension," "aromaticity, electronegative halogenicity" ) if self.plot_cubes: plots_defaults = {"grid_spacing": "0.05", "total_density": "0"} self.plots_dict = plots_defaults self.rem_dict["plots"] = "true" self.rem_dict["make_cube_files"] = "true" if self.nbo_params: self.rem_dict["nbo"] = "true" nbo_params_copy = self.nbo_params.copy() if "version" in nbo_params_copy: if nbo_params_copy["version"] == 7: self.rem_dict["nbo_external"] = "true" else: raise RuntimeError( "nbo params version should only be set to 7! Exiting..." ) for key in nbo_params_copy: if key == "version": self.nbo_params.pop(key) if self.new_geom_opt: self.rem_dict["geom_opt2"] = "3" if "maxiter" in self.new_geom_opt and self.new_geom_opt["maxiter"] != str( self.geom_opt_max_cycles ): raise RuntimeError( "Max # of optimization cycles must be the same! Exiting..." )
[docs] def get_input_set(self, molecule: Molecule = None) -> QCInputSet: """Get a QChem InputSet for a molecule. Parameters ---------- molecule: Molecule Pymatgen representation of a molecule for which the QCInputSet will be generated Returns ------- QchemInputSet A QChem input set """ if self.overwrite_inputs: for sub, sub_dict in self.overwrite_inputs.items(): if sub == "rem": temp_rem = lower_and_check_unique(sub_dict) for k, v in temp_rem.items(): self.rem_dict[k] = v if sub == "pcm": temp_pcm = lower_and_check_unique(sub_dict) for k, v in temp_pcm.items(): self.pcm_dict[k] = v if sub == "solvent": temp_solvent = lower_and_check_unique(sub_dict) for k, v in temp_solvent.items(): self.solv_dict[k] = v if sub == "smx": temp_smx = lower_and_check_unique(sub_dict) for k, v in temp_smx.items(): self.smx_dict[k] = v if sub == "scan": temp_scan = lower_and_check_unique(sub_dict) for k, v in temp_scan.items(): self.scan_dict[k] = v if sub == "van_der_waals": temp_vdw = lower_and_check_unique(sub_dict) for k, v in temp_vdw.items(): self.vdw_dict[k] = v # set the PCM section to read custom radii self.pcm_dict["radii"] = "read" if sub == "plots": temp_plots = lower_and_check_unique(sub_dict) for k, v in temp_plots.items(): self.plots_dict[k] = v if sub == "nbo": if self.nbo_dict is None: raise RuntimeError( "Can't overwrite nbo params when NBO" "is not being run! Exiting..." ) temp_nbo = lower_and_check_unique(sub_dict) for k, v in temp_nbo.items(): self.nbo_dict[k] = v if sub == "geom_opt": if self.geom_opt_dict is None: raise RuntimeError( "Can't overwrite geom_opt params when" "not using the new optimizer! Exiting..." ) temp_geomopt = lower_and_check_unique(sub_dict) for k, v in temp_geomopt.items(): self.geom_opt_dict[k] = v if sub == "opt": temp_opts = lower_and_check_unique(sub_dict) for k, v in temp_opts.items(): self.opt_dict[k] = v return QCInputSet( qcinput=QCInput( molecule, rem=self.rem_dict, opt=self.opt_dict, pcm=self.pcm_dict, solvent=self.solv_dict, smx=self.smx_dict, scan=self.scan_dict, van_der_waals=self.vdw_dict, vdw_mode=self.vdw_mode, plots=self.plots_dict, nbo=self.nbo_params, geom_opt=self.new_geom_opt, ) )