# Copyright (c) 2022-2026, Yongchao Wu in Aalto University
# This file is from the mdapy project, released under the BSD 3-Clause License.
from mdapy import _nepcal
from mdapy.box import Box
from mdapy.parallel import get_num_threads
from typing import Tuple
import numpy as np
import polars as pl
import os
from mdapy.calculator import CalculatorMP
[docs]
class NEP(CalculatorMP):
"""
NEP calculator for mdapy static calculation framework.
This class provides an interface to compute atomic properties using a
pre-trained NEP (Neuroevolution Potential) machine learning model. It
can calculate energies, forces, stresses, virials, descriptors, and
latent space representations.
Parameters
----------
filename : str
Path to the NEP model file (typically 'nep.txt' or similar)
Attributes
----------
calc : nepcal.NEPCalculator
Underlying C++ NEP calculator object (NEP_CPU: https://github.com/brucefan1983/NEP_CPU)
results : dict
Dictionary storing computed results with keys:
- 'energies': per-atom potential energies
- 'forces': per-atom forces (N×3 array)
- 'virials': per-atom virials (N×9 array)
- 'stress': system stress tensor (6-component Voigt notation)
Raises
------
FileNotFoundError
If the specified model file does not exist
Examples
--------
>>> from mdapy import build_crystal
>>> from mdapy.nep import NEP
>>> # Load NEP model
>>> calc = NEP("nep.txt")
>>> system = build_crystal("Cu", "fcc", 3.615)
>>> system.calc = calc
>>> # Calculate properties
>>> energy = system.get_energies() # per-atom potential energy
>>> forces = system.get_forces() # per-atom force
>>> virial = system.get_virials() # per-atom virials
>>> stress = system.get_stress() # system stress
>>> total_energy = system.get_energy() # system potential energy
>>> descriptor = calc.get_descriptor(system.data, system.box) # descriptor
>>> laten_space = calc.get_latentspace(system.data, system.box) # latentspace
"""
def __init__(self, filename: str):
"""
Initialize NEP calculator with a trained model.
Parameters
----------
filename : str
Path to the NEP model file
Raises
------
FileNotFoundError
If the model file does not exist
"""
if not os.path.exists(filename):
raise FileNotFoundError(f"{filename} does not exist.")
self.calc = _nepcal.NEPCalculator(filename)
assert self.calc.info["model_type"] == 0, "Only support energy NEP model."
self._is_qnep = False
if self.calc.info["charge_mode"] > 0:
self._is_qnep = True
self.rc = max(self.calc.info["radial_cutoff"], self.calc.info["angular_cutoff"])
# Initialize results dictionary
self.results = {}
[docs]
def setAtoms(
self, data: pl.DataFrame, box: Box
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
"""
Prepare atomic configuration data for NEP calculator.
This method converts the mdapy data format to the format required
by the underlying NEP calculator, including type mapping and
coordinate extraction.
Parameters
----------
data : pl.DataFrame
DataFrame containing atomic information with columns:
- 'x', 'y', 'z': atomic coordinates
- 'element': chemical symbols (e.g., 'Cu', 'Au')
box : Box
Simulation box object
Returns
-------
type_list : np.ndarray
Integer array mapping each atom to its type index
x : np.ndarray
X-coordinates of all atoms
y : np.ndarray
Y-coordinates of all atoms
z : np.ndarray
Z-coordinates of all atoms
box_array : np.ndarray
Box matrix (3×3 array)
Raises
------
AssertionError
If required columns are missing or if elements are not in the model
Notes
-----
The NEP model must have been trained with all elements present in the data.
"""
# Validate that required columns exist
for i in ["x", "y", "z", "element"]:
assert i in data.columns, f"data must contain {i} property."
# Check that all elements in data are supported by the NEP model
for i in data["element"].unique():
assert i in self.calc.info["element_list"], (
f"NEP model did not include {i}."
)
# Create mapping from element symbols to type indices
element2type = {j: i for i, j in enumerate(self.calc.info["element_list"])}
# Convert element symbols to integer type indices
type_list = data.with_columns(
pl.col("element").replace_strict(element2type).rechunk().alias("type")
)["type"].to_numpy(allow_copy=False)
# Extract coordinates as separate arrays (required by NEP calculator)
new_box = box.box.copy()
for i, j in enumerate(box.boundary):
if j == 0:
new_box[i, i] += 3 * self.rc
return (
type_list,
data["x"].to_numpy(allow_copy=False),
data["y"].to_numpy(allow_copy=False),
data["z"].to_numpy(allow_copy=False),
new_box, # 3×3 box matrix
)
[docs]
def calculate(self, data: pl.DataFrame, box: Box):
"""
Calculate energies, forces, virials, and stress for the system.
This method performs the main NEP calculation and stores results
in the `self.results` dictionary. It's automatically called by
other getter methods if results are not already cached.
Parameters
----------
data : pl.DataFrame
DataFrame containing atomic positions and elements
box : Box
Simulation box object
Notes
-----
Results are stored in `self.results` with keys:
- 'energies': per-atom potential energies (N,)
- 'forces': per-atom forces (N, 3)
- 'virials': per-atom virials (N, 9)
- 'stress': system stress tensor (6,) in Voigt notation
- 'charge' : per-atom charge (N,), only available for qNEP
- 'bec' : per-atom bec (N, 9), only available for qNEP
The stress tensor is computed from virials as:
σ = -(W + W^T) / (2V) where W is the total virial and V is volume.
"""
N = data.shape[0] # Number of atoms
# Allocate arrays for outputs
potential = np.zeros(N, float) # Per-atom energies
force = np.zeros((N, 3), float) # Per-atom forces [fx, fy, fz]
virial = np.zeros((N, 9), float) # Per-atom virials (9 components)
if self._is_qnep:
charge = np.zeros(N, float) # Per-atom charges
bec = np.zeros((N, 9), float) # Per-atom bec (9 components)
# Call the C++NEP calculator
if self._is_qnep:
self.calc.calculate_charge(
*self.setAtoms(data, box), potential, force, virial, charge, bec, get_num_threads()
)
else:
self.calc.calculate(*self.setAtoms(data, box), potential, force, virial, get_num_threads())
# Store results
self.results["energies"] = potential
self.results["forces"] = force
self.results["virials"] = virial
if self._is_qnep:
self.results["charges"] = charge
self.results["bec"] = bec
# Calculate stress tensor from virials
v = virial.sum(axis=0) # Sum virials over all atoms
# Reshape to 3×3 matrix: v_xx, v_xy, v_xz, v_yx, v_yy, v_yz, v_zx, v_zy, v_zz
v = v.reshape(3, 3)
# Stress = -(virial + virial^T) / (2 * volume)
stress = (-0.5 * (v + v.T) / box.volume).ravel()
# Convert to Voigt notation: [σ_xx, σ_yy, σ_zz, σ_yz, σ_xz, σ_xy]
stress = stress[[0, 4, 8, 5, 2, 1]]
self.results["stress"] = stress
[docs]
def get_energies(self, data: pl.DataFrame, box: Box) -> np.ndarray:
"""
Get per-atom potential energies.
Parameters
----------
data : pl.DataFrame
Atomic configuration data
box : Box
Simulation box
Returns
-------
np.ndarray
Array of shape (N,) containing energy for each atom
"""
if "energies" not in self.results.keys():
self.calculate(data, box)
return self.results["energies"]
[docs]
def get_energy(self, data: pl.DataFrame, box: Box) -> float:
"""
Get total potential energy of the system.
Parameters
----------
data : pl.DataFrame
Atomic configuration data
box : Box
Simulation box
Returns
-------
float
Total potential energy (sum of per-atom energies)
"""
return self.get_energies(data, box).sum()
[docs]
def get_virials(self, data: pl.DataFrame, box: Box) -> np.ndarray:
"""
Get per-atom virial tensors.
Parameters
----------
data : pl.DataFrame
Atomic configuration data
box : Box
Simulation box
Returns
-------
np.ndarray
Array of shape (N, 9) with virial components for each atom
Ordered as: [v_xx, v_xy, v_xz, v_yx, v_yy, v_yz, v_zx, v_zy, v_zz]
"""
if "virials" not in self.results.keys():
self.calculate(data, box)
return self.results["virials"]
[docs]
def get_forces(self, data: pl.DataFrame, box: Box) -> np.ndarray:
"""
Get forces acting on each atom.
Parameters
----------
data : pl.DataFrame
Atomic configuration data
box : Box
Simulation box
Returns
-------
np.ndarray
Array of shape (N, 3) containing force components [fx, fy, fz]
"""
if "forces" not in self.results.keys():
self.calculate(data, box)
return self.results["forces"]
[docs]
def get_stress(self, data: pl.DataFrame, box: Box) -> np.ndarray:
"""
Get stress tensor of the system.
Parameters
----------
data : pl.DataFrame
Atomic configuration data
box : Box
Simulation box
Returns
-------
np.ndarray
Stress tensor in Voigt notation: [σ_xx, σ_yy, σ_zz, σ_yz, σ_xz, σ_xy]
"""
if "stress" not in self.results.keys():
self.calculate(data, box)
return self.results["stress"]
[docs]
def get_charges(self, data: pl.DataFrame, box: Box) -> np.ndarray:
"""
Get per-atom charges for qNEP.
Parameters
----------
data : pl.DataFrame
Atomic configuration data
box : Box
Simulation box
Returns
-------
np.ndarray
Array of shape (N,) containing charge for each atom
"""
if self._is_qnep:
if "charges" not in self.results.keys():
self.calculate(data, box)
else:
raise ValueError("Charges is only available for qNEP.")
return self.results["charges"]
[docs]
def get_bec(self, data: pl.DataFrame, box: Box) -> np.ndarray:
"""
Get per-atom bec for qNEP.
Parameters
----------
data : pl.DataFrame
Atomic configuration data
box : Box
Simulation box
Returns
-------
np.ndarray
Array of shape (N, 9) with bec components for each atom
Ordered as: [bec_xx, bec_xy, bec_xz, bec_yx, bec_yy, bec_yz, bec_zx, bec_zy, bec_zz]
"""
if self._is_qnep:
if "bec" not in self.results.keys():
self.calculate(data, box)
else:
raise ValueError("bec is only available for qNEP.")
return self.results["bec"]
[docs]
def get_descriptor(self, data: pl.DataFrame, box: Box) -> np.ndarray:
"""
Get atomic descriptors from the NEP model.
Descriptors are the learned feature representations in the hidden
layers of the neural network, before the final energy prediction.
Parameters
----------
data : pl.DataFrame
Atomic configuration data
box : Box
Simulation box
Returns
-------
np.ndarray
Array of shape (N, num_ndim) containing descriptor vectors
for each atom, where num_ndim is the descriptor dimension
Notes
-----
Descriptors can be useful for analyzing structural similarities.
"""
N = data.shape[0]
descriptor = np.zeros((N, self.calc.info["num_ndim"]), float)
self.calc.get_descriptors(*self.setAtoms(data, box), descriptor, get_num_threads())
return descriptor
[docs]
def get_latentspace(self, data: pl.DataFrame, box: Box) -> np.ndarray:
"""
Get latent space representations from the NEP model.
The latent space is the representation in an intermediate layer
of the neural network, capturing compressed structural information.
Parameters
----------
data : pl.DataFrame
Atomic configuration data
box : Box
Simulation box
Returns
-------
np.ndarray
Array of shape (N, num_nlatent) containing latent vectors
for each atom, where num_nlatent is the latent space dimension
"""
N = data.shape[0]
latentspace = np.zeros((N, self.calc.info["num_nlatent"]), float)
self.calc.get_latentspace(*self.setAtoms(data, box), latentspace, get_num_threads())
return latentspace
if __name__ == "__main__":
pass