autoatml · naik-aakash · Dec 8, 2024 · Dec 8, 2024 · Dec 8, 2024 · Dec 8, 2024
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -1,6 +1,7 @@
 {
   "name": "DeveloperEnv",
   "image": "ghcr.io/autoatml/autoplex/autoplex-python-3.10:0.1.1",
+  "initializeCommand": "docker system prune --all --force",
   "hostRequirements": {
    "cpus": 4,
    "memory": "16gb",

diff --git a/Dockerfile b/Dockerfile
@@ -7,6 +7,7 @@ FROM mambaorg/micromamba:1.5.10
 # Set environment variables for micromamba
 ENV MAMBA_DOCKERFILE_ACTIVATE=1
 ENV MAMBA_ROOT_PREFIX=/opt/conda
+ENV MAMBA_NO_LOW_SPEED_LIMIT=1
 
 # Switch to root to install all dependencies (using non-root user causes permission issues)
 USER root
@@ -31,20 +32,14 @@ RUN apt-get update && apt-get install -y \
     git \
     && rm -rf /var/lib/apt/lists/*
 
-# Install Python
+# Install Python, cuda toolkit and clean up tarballs
 RUN micromamba install -y -n base -c conda-forge \ python=${PYTHON_VERSION} && \
     micromamba clean --all --yes
 
-# Install testing dependencies
-RUN python -m pip install --no-cache-dir --upgrade pip \
-    && pip install --no-cache-dir uv \
-    && uv pip install pre-commit pytest pytest-mock pytest-split pytest-cov types-setuptools
-
 # Install Julia
 RUN curl -fsSL https://julialang-s3.julialang.org/bin/linux/x64/1.9/julia-1.9.2-linux-x86_64.tar.gz | tar -xz -C /opt \
     && ln -s /opt/julia-1.9.2/bin/julia /usr/local/bin/julia
 
-
 # Set up Julia environment (ACEpotentials.jl interface)
 RUN julia -e 'using Pkg; Pkg.Registry.add("General"); Pkg.Registry.add(Pkg.Registry.RegistrySpec(url="https://github.com/ACEsuit/ACEregistry")); Pkg.add(Pkg.PackageSpec(;name="ACEpotentials", version="0.6.7")); Pkg.add("DataFrames"); Pkg.add("CSV")'
 
@@ -74,15 +69,18 @@ RUN curl -fsSL https://download.lammps.org/tars/lammps-29Aug2024_update1.tar.gz
      && make install-python \
      && cmake --build . --target clean
 
-# Add LAMMPS to PATH and Shared LAMMPS library to LD_LIBRARY_PATH
+# Add LAMMPS to PATH and Update LD_LIBRARY_PATH
 ENV PATH="${PATH}:/root/.local/bin"
-ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/root/.local/lib"
+ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/root/.local/lib:/opt/conda/lib"
 
 # Set the working directory
 WORKDIR /workspace
 
 # Copy the current directory contents into the container at /workspace
 COPY . /workspace
 
-# Install autoplex and clear cache
-RUN uv pip install --prerelease=allow .[strict,docs] && uv cache clean && rm -rf /tmp/*
+# Install autoplex, testing dependencies and clear cache
+RUN python -m pip install --no-cache-dir --upgrade pip \
+    && pip install --no-cache-dir uv \
+    && uv pip install pre-commit pytest pytest-mock pytest-split pytest-cov types-setuptools \
+    && uv pip install --prerelease=allow .[strict,docs] && uv cache clean && rm -rf /tmp/*
diff --git a/pyproject.toml b/pyproject.toml
@@ -26,6 +26,7 @@ dependencies = [
      "pymatgen>=2024.9.17.1",
      "atomate2[strict]>=0.0.18",
      "ase==3.23.0",
+     "calorine>=3.0",
      "matgl==1.1.3",
      "mace-torch==0.3.9",
      "numpy==1.26.4",
@@ -65,6 +66,7 @@ docs = [
 strict = [
      "pymatgen==2024.10.3", #?
      "atomate2[strict]==0.0.18",
+     "calorine==3.0",
      "matgl==1.1.3",
      "quippy-ase==0.9.14; python_version < '3.12'",
      "torch==2.2.1",

diff --git a/src/autoplex/auto/phonons/jobs.py b/src/autoplex/auto/phonons/jobs.py
@@ -294,6 +294,8 @@ def complete_benchmark(  # this function was put here to prevent circular import
                 path
             )  # M3GNet requires path and fit already returns the path
             # also need to find a different solution for separated fit then (name to path could be modified)
+        elif ml_model in ["NEP"]:
+            ml_potential = Path(path) / "nep.txt"
         elif ml_model in ["NEQUIP"]:
             ml_potential = Path(path) / "deployed_nequip_model.pth"
         else:  # MACE

diff --git a/src/autoplex/auto/rss/jobs.py b/src/autoplex/auto/rss/jobs.py
@@ -1,6 +1,7 @@
 """RSS Jobs include the generation of the initial potential model as well as iterative RSS exploration."""
 
 import logging
+from typing import Literal
 
 from jobflow import Flow, Response, job
 
@@ -54,7 +55,7 @@ def initial_rss(
     force_max: float | None = None,
     force_label: str = "REF_forces",
     pre_database_dir: str | None = None,
-    mlip_type: str = "GAP",
+    mlip_type: Literal["GAP", "J-ACE", "NEP", "NEQUIP", "M3GNET", "MACE"] = "GAP",
     ref_energy_name: str = "REF_energy",
     ref_force_name: str = "REF_forces",
     ref_virial_name: str = "REF_virial",
@@ -137,9 +138,8 @@ def initial_rss(
         The label of force values to use for distillation. Default is 'REF_forces'.
     pre_database_dir: str | None
         Directory where the previous database was saved. Default is None.
-    mlip_type: str
-        Choose one specific MLIP type to be fitted: 'GAP' | 'J-ACE' | 'NEQUIP' | 'M3GNET' | 'MACE'.
-        Default is 'GAP'.
+    mlip_type: Literal["GAP", "J-ACE", "NEP", "NEQUIP", "M3GNET", "MACE"]
+        Choose one specific MLIP type to be fitted. Default is 'GAP'.
     ref_energy_name: str
         Reference energy name. Default is 'REF_energy'.
     ref_force_name: str
@@ -286,7 +286,7 @@ def do_rss_iterations(
     distillation: bool = True,
     force_max: float = 200,
     force_label: str = "REF_forces",
-    mlip_type: str = "GAP",
+    mlip_type: Literal["GAP", "J-ACE", "NEP", "NEQUIP", "M3GNET", "MACE"] = "GAP",
     ref_energy_name: str = "REF_energy",
     ref_force_name: str = "REF_forces",
     ref_virial_name: str = "REF_virial",
@@ -409,8 +409,8 @@ def do_rss_iterations(
         Maximum force value to exclude structures. Default is 200.
     force_label: str
         The label of force values to use for distillation. Default is 'REF_forces'.
-    mlip_type: str
-        Choose one specific MLIP type: 'GAP' | 'J-ACE' | 'NequIP' | 'M3GNet' | 'MACE'. Default is 'GAP'.
+    mlip_type: Literal["GAP", "J-ACE", "NEP", "NEQUIP", "M3GNET", "MACE"]
+        Choose one specific MLIP type to be fitted. Default is 'GAP'.
     ref_energy_name: str
         Reference energy name. Default is 'REF_energy'.
     ref_force_name: str

diff --git a/src/autoplex/data/common/jobs.py b/src/autoplex/data/common/jobs.py
@@ -702,6 +702,7 @@ def preprocess_data(
     distillation: bool = False,
     force_max: float = 40,
     force_label: str = "REF_forces",
+    energy_label: str = "REF_energy",
     pre_database_dir: str | None = None,
     reg_minmax: list[tuple] | None = None,
     isolated_atom_energies: dict | None = None,
@@ -733,6 +734,8 @@ def preprocess_data(
         Maximum force value to exclude structures.
     force_label: str
         The label of force values to use for distillation.
+    energy_label: str
+        The label of energy values to use for distillation.
     pre_database_dir : str
         Directory where the previous database was saved.
     reg_minmax: list[tuple]
@@ -755,7 +758,9 @@ def preprocess_data(
     if test_ratio == 0 or test_ratio is None:
         train_structures, test_structures = atoms, atoms
     else:
-        train_structures, test_structures = stratified_dataset_split(atoms, test_ratio)
+        train_structures, test_structures = stratified_dataset_split(
+            atoms, test_ratio, energy_label
+        )
 
     if pre_database_dir and os.path.exists(pre_database_dir):
         files_to_copy = ["train.extxyz", "test.extxyz"]

diff --git a/src/autoplex/data/common/utils.py b/src/autoplex/data/common/utils.py
@@ -1485,7 +1485,9 @@ def data_distillation(
     return atoms_distilled
 
 
-def stratified_dataset_split(atoms: Atoms, split_ratio: float) -> tuple[
+def stratified_dataset_split(
+    atoms: Atoms, split_ratio: float, energy_label: str
+) -> tuple[
     list[Atom | Atoms]
     | list[Atom | Atoms | list[Atom | Atoms] | list[Atom | Atoms | list]],
     list[Atom | Atoms | list[Atom | Atoms] | list[Atom | Atoms | list]],
@@ -1499,6 +1501,8 @@ def stratified_dataset_split(atoms: Atoms, split_ratio: float) -> tuple[
         ASE Atoms object
     split_ratio: float
         Parameter to divide the training set and the test set.
+    energy_label: str
+        The label for the energy property in the atoms.
 
     Returns
     -------
@@ -1520,7 +1524,15 @@ def stratified_dataset_split(atoms: Atoms, split_ratio: float) -> tuple[
     if len(atoms) != len(atom_bulk):
         atoms = atom_bulk
 
-    average_energies = np.array([atom.info["REF_energy"] / len(atom) for atom in atoms])
+    # Need this try except block because the energy label is not present as info
+    try:
+        average_energies = np.array(
+            [atom.info[energy_label] / len(atom) for atom in atoms]
+        )
+    except KeyError:
+        average_energies = np.array(
+            [atom.get_potential_energy() / len(atom) for atom in atoms]
+        )
     # sort by energy
     sorted_indices = np.argsort(average_energies)
     atoms = [atoms[i] for i in sorted_indices]

diff --git a/src/autoplex/data/phonons/flows.py b/src/autoplex/data/phonons/flows.py
@@ -649,6 +649,27 @@ def make_from_ml_model(
         elif ml_model == "J-ACE":
             raise UserWarning("No atomate2 ACE.jl PhononMaker implemented.")
 
+        elif ml_model == "NEP":
+            if calculator_kwargs is None:
+                calculator_kwargs = {"model_filename": str(potential_file)}
+
+            ml_prep = ml_phonon_maker_preparation(
+                bulk_relax_maker=ForceFieldRelaxMaker(
+                    relax_cell=True,
+                    relax_kwargs={"interval": 500},
+                    force_field_name="NEP",
+                ),
+                phonon_displacement_maker=ForceFieldStaticMaker(
+                    name="nep phonon static",
+                    force_field_name="NEP",
+                ),
+                static_energy_maker=ForceFieldStaticMaker(
+                    force_field_name="NEP",
+                ),
+                calculator_kwargs=calculator_kwargs,
+                relax_maker_kwargs=self.relax_maker_kwargs,
+                static_maker_kwargs=self.static_maker_kwargs,
+            )
         elif ml_model == "NEQUIP":
             if calculator_kwargs is None:
                 calculator_kwargs = {

diff --git a/src/autoplex/data/rss/jobs.py b/src/autoplex/data/rss/jobs.py
@@ -7,6 +7,7 @@
 from pathlib import Path
 from shutil import which
 from subprocess import run
+from typing import Literal
 
 import ase.io
 import numpy as np
@@ -413,7 +414,7 @@ def _parallel_process(
 
 @job
 def do_rss_single_node(
-    mlip_type: str,
+    mlip_type: Literal["GAP", "J-ACE", "NEP", "NEQUIP", "M3GNET", "MACE"],
     mlip_path: str,
     iteration_index: str,
     structures: list[Structure],
@@ -441,9 +442,8 @@ def do_rss_single_node(
 
     Parameters
     ----------
-    mlip_type: str
-        Choose one specific MLIP type:
-        'GAP' | 'J-ACE' | 'NequIP' | 'M3GNet' | 'MACE'.
+    mlip_type: Literal["GAP", "J-ACE", "NEP", "NEQUIP", "M3GNET", "MACE"]
+        Choose one specific MLIP type to be fitted.
     mlip_path: str
         Path to the MLIP model.
     iteration_index: str
@@ -521,7 +521,7 @@ def do_rss_single_node(
 
 @job
 def do_rss_multi_node(
-    mlip_type: str,
+    mlip_type: Literal["GAP", "J-ACE", "NEP", "NEQUIP", "M3GNET", "MACE"],
     mlip_path: str,
     iteration_index: str,
     structure: list[Structure] | list[list[Structure]] | None = None,
@@ -550,9 +550,8 @@ def do_rss_multi_node(
 
     Parameters
     ----------
-    mlip_type: str
-        Choose one specific MLIP type:
-        'GAP' | 'J-ACE' | 'NequIP' | 'M3GNet' | 'MACE'.
+    mlip_type: Literal["GAP", "J-ACE", "NEP", "NEQUIP", "M3GNET", "MACE"]
+        Choose one specific MLIP type to be fitted.
     mlip_path: str
         Path to the MLIP model.
     iteration_index: str

diff --git a/src/autoplex/data/rss/utils.py b/src/autoplex/data/rss/utils.py
@@ -553,7 +553,7 @@ def build_traj():
 
 
 def minimize_structures(
-    mlip_type: str,
+    mlip_type: Literal["GAP", "J-ACE", "NEP", "NEQUIP", "M3GNET", "MACE"],
     mlip_path: str,
     iteration_index: str,
     structures: list[Structure],
@@ -580,9 +580,8 @@ def minimize_structures(
 
     Parameters
     ----------
-    mlip_type: str
-        Choose one specific MLIP type:
-        'GAP' | 'J-ACE' | 'NequIP' | 'M3GNet' | 'MACE'.
+    mlip_type: Literal["GAP", "J-ACE", "NEP", "NEQUIP", "M3GNET", "MACE"]
+        Choose one specific MLIP type to be fitted.
     mlip_path: str
         Path to the MLIP model.
     iteration_index: str