Source code for mdapy.nep

# Copyright (c) 2022-2026, Yongchao Wu in Aalto University
# This file is from the mdapy project, released under the BSD 3-Clause License.

from mdapy import _nepcal
from mdapy.box import Box
from mdapy.parallel import get_num_threads
from typing import Tuple
import numpy as np
import polars as pl
import os
from mdapy.calculator import CalculatorMP



[docs]
class NEP(CalculatorMP):
    """
    NEP calculator for mdapy static calculation framework.

    This class provides an interface to compute atomic properties using a
    pre-trained NEP (Neuroevolution Potential) machine learning model. It
    can calculate energies, forces, stresses, virials, descriptors, and
    latent space representations.

    Parameters
    ----------
    filename : str
        Path to the NEP model file (typically 'nep.txt' or similar)

    Attributes
    ----------
    calc : nepcal.NEPCalculator
        Underlying C++ NEP calculator object (NEP_CPU: https://github.com/brucefan1983/NEP_CPU)
    results : dict
        Dictionary storing computed results with keys:

        - 'energies': per-atom potential energies
        - 'forces': per-atom forces (N×3 array)
        - 'virials': per-atom virials (N×9 array)
        - 'stress': system stress tensor (6-component Voigt notation)

    Raises
    ------
    FileNotFoundError
        If the specified model file does not exist

    Examples
    --------
    >>> from mdapy import build_crystal
    >>> from mdapy.nep import NEP
    >>> # Load NEP model
    >>> calc = NEP("nep.txt")
    >>> system = build_crystal("Cu", "fcc", 3.615)
    >>> system.calc = calc
    >>> # Calculate properties
    >>> energy = system.get_energies()  # per-atom potential energy
    >>> forces = system.get_forces()  # per-atom force
    >>> virial = system.get_virials()  # per-atom virials
    >>> stress = system.get_stress()  # system stress
    >>> total_energy = system.get_energy()  # system potential energy
    >>> descriptor = calc.get_descriptor(system.data, system.box)  # descriptor
    >>> laten_space = calc.get_latentspace(system.data, system.box)  # latentspace
    """

    def __init__(self, filename: str):
        """
        Initialize NEP calculator with a trained model.

        Parameters
        ----------
        filename : str
            Path to the NEP model file

        Raises
        ------
        FileNotFoundError
            If the model file does not exist
        """
        if not os.path.exists(filename):
            raise FileNotFoundError(f"{filename} does not exist.")
        self.calc = _nepcal.NEPCalculator(filename)
        assert self.calc.info["model_type"] == 0, "Only support energy NEP model."
        self._is_qnep = False
        if self.calc.info["charge_mode"] > 0:
            self._is_qnep = True

        self.rc = max(self.calc.info["radial_cutoff"], self.calc.info["angular_cutoff"])
        # Initialize results dictionary
        self.results = {}


[docs]
    def setAtoms(
        self, data: pl.DataFrame, box: Box
    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
        """
        Prepare atomic configuration data for NEP calculator.

        This method converts the mdapy data format to the format required
        by the underlying NEP calculator, including type mapping and
        coordinate extraction.

        Parameters
        ----------
        data : pl.DataFrame
            DataFrame containing atomic information with columns:

            - 'x', 'y', 'z': atomic coordinates
            - 'element': chemical symbols (e.g., 'Cu', 'Au')
        box : Box
            Simulation box object

        Returns
        -------
        type_list : np.ndarray
            Integer array mapping each atom to its type index
        x : np.ndarray
            X-coordinates of all atoms
        y : np.ndarray
            Y-coordinates of all atoms
        z : np.ndarray
            Z-coordinates of all atoms
        box_array : np.ndarray
            Box matrix (3×3 array)

        Raises
        ------
        AssertionError
            If required columns are missing or if elements are not in the model

        Notes
        -----
        The NEP model must have been trained with all elements present in the data.
        """
        # Validate that required columns exist
        for i in ["x", "y", "z", "element"]:
            assert i in data.columns, f"data must contain {i} property."

        # Check that all elements in data are supported by the NEP model
        for i in data["element"].unique():
            assert i in self.calc.info["element_list"], (
                f"NEP model did not include {i}."
            )

        # Create mapping from element symbols to type indices
        element2type = {j: i for i, j in enumerate(self.calc.info["element_list"])}

        # Convert element symbols to integer type indices
        type_list = data.with_columns(
            pl.col("element").replace_strict(element2type).rechunk().alias("type")
        )["type"].to_numpy(allow_copy=False)

        # Extract coordinates as separate arrays (required by NEP calculator)
        new_box = box.box.copy()
        for i, j in enumerate(box.boundary):
            if j == 0:
                new_box[i, i] += 3 * self.rc

        return (
            type_list,
            data["x"].to_numpy(allow_copy=False),
            data["y"].to_numpy(allow_copy=False),
            data["z"].to_numpy(allow_copy=False),
            new_box,  # 3×3 box matrix
        )



[docs]
    def calculate(self, data: pl.DataFrame, box: Box):
        """
        Calculate energies, forces, virials, and stress for the system.

        This method performs the main NEP calculation and stores results
        in the `self.results` dictionary. It's automatically called by
        other getter methods if results are not already cached.

        Parameters
        ----------
        data : pl.DataFrame
            DataFrame containing atomic positions and elements
        box : Box
            Simulation box object

        Notes
        -----
        Results are stored in `self.results` with keys:

        - 'energies': per-atom potential energies (N,)
        - 'forces': per-atom forces (N, 3)
        - 'virials': per-atom virials (N, 9)
        - 'stress': system stress tensor (6,) in Voigt notation
        - 'charge' : per-atom charge (N,), only available for qNEP
        - 'bec' : per-atom bec (N, 9), only available for qNEP

        The stress tensor is computed from virials as:
        σ = -(W + W^T) / (2V) where W is the total virial and V is volume.
        """
        N = data.shape[0]  # Number of atoms

        # Allocate arrays for outputs
        potential = np.zeros(N, float)  # Per-atom energies
        force = np.zeros((N, 3), float)  # Per-atom forces [fx, fy, fz]
        virial = np.zeros((N, 9), float)  # Per-atom virials (9 components)
        if self._is_qnep:
            charge = np.zeros(N, float)  # Per-atom charges
            bec = np.zeros((N, 9), float)  # Per-atom bec (9 components)

        # Call the C++NEP calculator
        if self._is_qnep:
            self.calc.calculate_charge(
                *self.setAtoms(data, box), potential, force, virial, charge, bec, get_num_threads()
            )
        else:
            self.calc.calculate(*self.setAtoms(data, box), potential, force, virial, get_num_threads())

        # Store results
        self.results["energies"] = potential
        self.results["forces"] = force
        self.results["virials"] = virial
        if self._is_qnep:
            self.results["charges"] = charge
            self.results["bec"] = bec

        # Calculate stress tensor from virials
        v = virial.sum(axis=0)  # Sum virials over all atoms
        # Reshape to 3×3 matrix: v_xx, v_xy, v_xz, v_yx, v_yy, v_yz, v_zx, v_zy, v_zz
        v = v.reshape(3, 3)
        # Stress = -(virial + virial^T) / (2 * volume)
        stress = (-0.5 * (v + v.T) / box.volume).ravel()
        # Convert to Voigt notation: [σ_xx, σ_yy, σ_zz, σ_yz, σ_xz, σ_xy]
        stress = stress[[0, 4, 8, 5, 2, 1]]
        self.results["stress"] = stress



[docs]
    def get_energies(self, data: pl.DataFrame, box: Box) -> np.ndarray:
        """
        Get per-atom potential energies.

        Parameters
        ----------
        data : pl.DataFrame
            Atomic configuration data
        box : Box
            Simulation box

        Returns
        -------
        np.ndarray
            Array of shape (N,) containing energy for each atom
        """
        if "energies" not in self.results.keys():
            self.calculate(data, box)
        return self.results["energies"]



[docs]
    def get_energy(self, data: pl.DataFrame, box: Box) -> float:
        """
        Get total potential energy of the system.

        Parameters
        ----------
        data : pl.DataFrame
            Atomic configuration data
        box : Box
            Simulation box

        Returns
        -------
        float
            Total potential energy (sum of per-atom energies)
        """
        return self.get_energies(data, box).sum()



[docs]
    def get_virials(self, data: pl.DataFrame, box: Box) -> np.ndarray:
        """
        Get per-atom virial tensors.

        Parameters
        ----------
        data : pl.DataFrame
            Atomic configuration data
        box : Box
            Simulation box

        Returns
        -------
        np.ndarray
            Array of shape (N, 9) with virial components for each atom
            Ordered as: [v_xx, v_xy, v_xz, v_yx, v_yy, v_yz, v_zx, v_zy, v_zz]
        """
        if "virials" not in self.results.keys():
            self.calculate(data, box)
        return self.results["virials"]



[docs]
    def get_forces(self, data: pl.DataFrame, box: Box) -> np.ndarray:
        """
        Get forces acting on each atom.

        Parameters
        ----------
        data : pl.DataFrame
            Atomic configuration data
        box : Box
            Simulation box

        Returns
        -------
        np.ndarray
            Array of shape (N, 3) containing force components [fx, fy, fz]
        """
        if "forces" not in self.results.keys():
            self.calculate(data, box)
        return self.results["forces"]



[docs]
    def get_stress(self, data: pl.DataFrame, box: Box) -> np.ndarray:
        """
        Get stress tensor of the system.

        Parameters
        ----------
        data : pl.DataFrame
            Atomic configuration data
        box : Box
            Simulation box

        Returns
        -------
        np.ndarray
            Stress tensor in Voigt notation: [σ_xx, σ_yy, σ_zz, σ_yz, σ_xz, σ_xy]
        """
        if "stress" not in self.results.keys():
            self.calculate(data, box)
        return self.results["stress"]



[docs]
    def get_charges(self, data: pl.DataFrame, box: Box) -> np.ndarray:
        """
        Get per-atom charges for qNEP.

        Parameters
        ----------
        data : pl.DataFrame
            Atomic configuration data
        box : Box
            Simulation box

        Returns
        -------
        np.ndarray
            Array of shape (N,) containing charge for each atom
        """
        if self._is_qnep:
            if "charges" not in self.results.keys():
                self.calculate(data, box)
        else:
            raise ValueError("Charges is only available for qNEP.")
        return self.results["charges"]



[docs]
    def get_bec(self, data: pl.DataFrame, box: Box) -> np.ndarray:
        """
        Get per-atom bec for qNEP.

        Parameters
        ----------
        data : pl.DataFrame
            Atomic configuration data
        box : Box
            Simulation box

        Returns
        -------
        np.ndarray
            Array of shape (N, 9) with bec components for each atom
            Ordered as: [bec_xx, bec_xy, bec_xz, bec_yx, bec_yy, bec_yz, bec_zx, bec_zy, bec_zz]
        """
        if self._is_qnep:
            if "bec" not in self.results.keys():
                self.calculate(data, box)
        else:
            raise ValueError("bec is only available for qNEP.")
        return self.results["bec"]



[docs]
    def get_descriptor(self, data: pl.DataFrame, box: Box) -> np.ndarray:
        """
        Get atomic descriptors from the NEP model.

        Descriptors are the learned feature representations in the hidden
        layers of the neural network, before the final energy prediction.

        Parameters
        ----------
        data : pl.DataFrame
            Atomic configuration data
        box : Box
            Simulation box

        Returns
        -------
        np.ndarray
            Array of shape (N, num_ndim) containing descriptor vectors
            for each atom, where num_ndim is the descriptor dimension

        Notes
        -----
        Descriptors can be useful for analyzing structural similarities.
        """
        N = data.shape[0]
        descriptor = np.zeros((N, self.calc.info["num_ndim"]), float)
        self.calc.get_descriptors(*self.setAtoms(data, box), descriptor, get_num_threads())
        return descriptor



[docs]
    def get_latentspace(self, data: pl.DataFrame, box: Box) -> np.ndarray:
        """
        Get latent space representations from the NEP model.

        The latent space is the representation in an intermediate layer
        of the neural network, capturing compressed structural information.

        Parameters
        ----------
        data : pl.DataFrame
            Atomic configuration data
        box : Box
            Simulation box

        Returns
        -------
        np.ndarray
            Array of shape (N, num_nlatent) containing latent vectors
            for each atom, where num_nlatent is the latent space dimension

        """
        N = data.shape[0]
        latentspace = np.zeros((N, self.calc.info["num_nlatent"]), float)
        self.calc.get_latentspace(*self.setAtoms(data, box), latentspace, get_num_threads())
        return latentspace




if __name__ == "__main__":
    pass