-
Notifications
You must be signed in to change notification settings - Fork 13
add gromacs to frames node #463
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
2f21136
4584b12
c4a2475
2928b08
d8207ed
c259ba8
c695783
88fd54a
67b2ee2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,240 @@ | ||
| import typing | ||
| import warnings | ||
| from pathlib import Path | ||
|
|
||
| import h5py | ||
| import MDAnalysis as mda | ||
| import numpy as np | ||
| import tqdm | ||
| import znh5md | ||
| import zntrack | ||
| from ase import Atoms | ||
| from ase.calculators.singlepoint import SinglePointCalculator | ||
| from MDAnalysis.auxiliary.EDR import EDRReader | ||
|
|
||
| _TYPE_TO_ELEMENT = { | ||
| "CL": "Cl", | ||
| "NA": "Na", | ||
| "MG": "Mg", | ||
| "ZN": "Zn", | ||
| "FE": "Fe", | ||
| "CA": "Ca", | ||
| "MN": "Mn", | ||
| "CU": "Cu", | ||
| "LI": "Li", | ||
| "AL": "Al", | ||
| "SI": "Si", | ||
| "BR": "Br", | ||
| "SE": "Se", | ||
| } | ||
|
|
||
|
|
||
| def _get_symbols(u: mda.Universe) -> list[str]: | ||
| """Extract element symbols from a Universe, trying multiple strategies.""" | ||
| # 1. Use elements attribute if available | ||
| try: | ||
| return list(u.atoms.elements) | ||
| except (mda.exceptions.NoDataError, AttributeError): | ||
| pass | ||
|
|
||
| # 2. Use atom types (usually cleaner than names for CHARMM-GUI) | ||
| types = u.atoms.types | ||
| symbols = [] | ||
| for t in types: | ||
| t_upper = t.upper() | ||
| if t_upper in _TYPE_TO_ELEMENT: | ||
| symbols.append(_TYPE_TO_ELEMENT[t_upper]) | ||
| elif len(t) <= 2 and t[0].isalpha(): | ||
| # Capitalize properly: first letter upper, rest lower | ||
| symbols.append(t[0].upper() + t[1:].lower() if len(t) > 1 else t.upper()) | ||
| else: | ||
| # Last resort: take leading alphabetic characters from atom name | ||
| symbols.append(t[0].upper()) | ||
| return symbols | ||
|
|
||
|
|
||
| def gmx_to_ase( | ||
| topology: str, | ||
| trajectory: str | None = None, | ||
| edr: str | None = None, | ||
| start: int | None = None, | ||
| stop: int | None = None, | ||
| step: int | None = None, | ||
| ) -> list[Atoms]: | ||
| """Convert a GROMACS trajectory to a list of ASE Atoms objects. | ||
|
|
||
| Extracts all available information: positions, velocities, forces, | ||
| and (via the .edr file) energies and stress. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| topology : str | ||
| Path to a GROMACS topology/structure file (.gro, .tpr). | ||
| trajectory : str | None | ||
| Path to a trajectory file (.xtc, .trr). If None, only the single | ||
| structure from the topology file is returned. | ||
| edr : str | None | ||
| Path to a GROMACS energy file (.edr). If given, per-frame energies | ||
| and stress tensors are attached via SinglePointCalculator. | ||
| start, stop, step : int | None | ||
| Slice parameters for selecting a subset of frames. | ||
|
|
||
| Returns | ||
| ------- | ||
| list[Atoms] | ||
| One ASE Atoms object per frame. Each Atoms has: | ||
| - positions (always) | ||
| - cell and pbc (always) | ||
| - velocities (if present in trajectory) | ||
| - forces (if present in trajectory, e.g. .trr) | ||
| - calculator with energy/stress/forces (if .edr provided or forces | ||
| present), plus all EDR terms stored in calc.results | ||
| """ | ||
| if trajectory is not None: | ||
| u = mda.Universe(topology, trajectory) | ||
| else: | ||
| u = mda.Universe(topology) | ||
|
|
||
| symbols = _get_symbols(u) | ||
|
|
||
| # Load EDR data if provided | ||
| edr_data = None | ||
| if edr is not None: | ||
| with warnings.catch_warnings(): | ||
| warnings.simplefilter("ignore") | ||
| reader = EDRReader(edr) | ||
| edr_all = reader.get_data(list(reader.terms)) | ||
| edr_times = edr_all.pop("Time") | ||
| edr_data = {term: values for term, values in edr_all.items()} | ||
| edr_terms = list(edr_data.keys()) | ||
|
|
||
| frames = [] | ||
| for ts in tqdm.tqdm(u.trajectory[start:stop:step]): | ||
| positions = ts.positions.copy() | ||
| box = ts.dimensions | ||
|
|
||
| atoms = Atoms(symbols=symbols, positions=positions, pbc=True) | ||
|
|
||
| if box is not None and all(box[:3] > 0): | ||
| atoms.set_cell(box, scale_atoms=False) | ||
|
Comment on lines
+154
to
+160
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Atoms are created with Proposed fix- atoms = Atoms(symbols=symbols, positions=positions, pbc=True)
-
- if box is not None and all(box[:3] > 0):
- atoms.set_cell(box, scale_atoms=False)
+ has_cell = box is not None and all(box[:3] > 0)
+ atoms = Atoms(symbols=symbols, positions=positions, pbc=has_cell)
+ if has_cell:
+ atoms.set_cell(box, scale_atoms=False)🤖 Prompt for AI Agents |
||
|
|
||
| # Velocities (e.g. from .gro or .trr) | ||
| if ts.has_velocities: | ||
| # MDAnalysis: Å/ps, ASE: Å/fs -> divide by 1000 | ||
| atoms.set_velocities(ts.velocities / 1000.0) | ||
|
Comment on lines
+162
to
+165
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🌐 Web query:
💡 Result: The ASE Atoms.set_velocities method expects velocities in ASE's internal units: length per time, where length is Ångström (Å = 1.0) and time is Å * sqrt(u / eV) ≈ 10.18 fs (since 1 fs ≈ 0.0982 in internal time units).[1][2] Velocities are dimensionless in the base system but interpreted as v_internal = Δposition / Δtime, with positions in Å and time derived from ase.units (eV=1, Å=1, u=1).[3][4][5] Kinetic energy is computed as sum(0.5 * m * v^2) in eV, confirming v has units sqrt(2*E/m) = sqrt(eV / u) / Å = 1 / time_unit.[6] To set velocities corresponding to a physical speed in fs^-1 (e.g., from VASP's Å/fs), scale by the time unit conversion: v_ase = v_physical * (1 fs / ase.units.fs) ≈ v_physical * 10.18, since ASE time unit ≈ 10.18 fs.[7][1] For example, 1 Å/fs in VASP requires ≈10.18 in ASE units to match kinetic energy.[7] Citations:
🏁 Script executed: # Find project configuration files to check ASE version
find . -maxdepth 2 -type f \( -name "setup.py" -o -name "setup.cfg" -o -name "pyproject.toml" -o -name "requirements.txt" -o -name "requirements*.txt" \) | head -20Repository: zincware/IPSuite Length of output: 77 🏁 Script executed: # Read the actual file to see current imports and code
head -50 ipsuite/data_loading/add_data_gromacs.pyRepository: zincware/IPSuite Length of output: 1189 🏁 Script executed: # Check the exact lines mentioned in the review
sed -n '155,175p' ipsuite/data_loading/add_data_gromacs.pyRepository: zincware/IPSuite Length of output: 820 🏁 Script executed: # Read pyproject.toml to find ASE version pinning
cat pyproject.toml | grep -A 20 -i "ase\|dependencies"Repository: zincware/IPSuite Length of output: 1917 🏁 Script executed: # Search for other uses of ase.units to see patterns
rg "from ase.units import|ase\.units\." --type py -B 2 -A 2Repository: zincware/IPSuite Length of output: 942 🏁 Script executed: # Look for other velocity or momentum related code
rg "velocit|momentum|set_velocit" --type py -B 2 -A 2 | head -100Repository: zincware/IPSuite Length of output: 5529 🏁 Script executed: # Search for tests related to gromacs loading
find . -name "*test*gromacs*" -o -name "*gromacs*test*" | head -10Repository: zincware/IPSuite Length of output: 42 🏁 Script executed: # Check if there are any test files for data_loading
find . -path "*/test*" -name "*add_data_gromacs*" -o -path "*/test*" -name "*gromacs*"Repository: zincware/IPSuite Length of output: 42 🏁 Script executed: # Look for any issue references or TODOs related to velocity
rg "TODO|FIXME|BUG|velocity.*unit|unit.*velocity" ipsuite/data_loading/add_data_gromacs.pyRepository: zincware/IPSuite Length of output: 42 🏁 Script executed: # Check what ase.units.fs actually equals by looking at ASE documentation or imports
python3 << 'EOF'
try:
from ase import units
print(f"ase.units.fs = {units.fs}")
print(f"1 / ase.units.fs = {1 / units.fs}")
print(f"ASE time unit in fs: {1/units.fs:.4f} fs")
except Exception as e:
print(f"Error: {e}")
EOFRepository: zincware/IPSuite Length of output: 89 🏁 Script executed: # Search for tests or examples using add_data_gromacs or trajectory loading
find . -type f -name "*.py" -exec grep -l "add_data_gromacs\|gromacs\|MDAnalysis.*trajectory" {} \; | head -20Repository: zincware/IPSuite Length of output: 180 🏁 Script executed: # Look for any set_velocities calls to see if there are other patterns
rg "set_velocities|get_velocities" --type py -B 3 -A 3Repository: zincware/IPSuite Length of output: 1320 🏁 Script executed: # Check if there are notebooks or examples demonstrating this
find . -name "*.ipynb" -o -name "*example*" -type f | grep -E "(gromacs|traj|velocity)" | head -10Repository: zincware/IPSuite Length of output: 42 🏁 Script executed: # Look at the comment in the current code more carefully - what does it claim ASE expects?
sed -n '160,170p' ipsuite/data_loading/add_data_gromacs.pyRepository: zincware/IPSuite Length of output: 533 Velocity unit conversion is incorrect — values are ~10× too small. MDAnalysis stores velocities in The proposed fix has the operation inverted—it multiplies by Corrected fix-from ase.units import kJ, mol
+from ase.units import fs, kJ, mol
@@
if ts.has_velocities:
- # MDAnalysis: Å/ps, ASE: Å/fs -> divide by 1000
- atoms.set_velocities(ts.velocities / 1000.0)
+ # MDAnalysis: Å/ps -> ASE internal velocity (Å / ASE time unit)
+ atoms.set_velocities(ts.velocities / (1000.0 * fs))🤖 Prompt for AI Agents |
||
|
|
||
| # Forces and energies via SinglePointCalculator | ||
| forces = ts.forces.copy() if ts.has_forces else None | ||
| energy = None | ||
| stress = None | ||
| extra_results = {} | ||
|
|
||
| if edr_data is not None: | ||
| # Match EDR frame to trajectory time | ||
| idx = np.argmin(np.abs(edr_times - ts.time)) | ||
| energy = float(edr_data["Potential"][idx]) # kJ/mol | ||
|
coderabbitai[bot] marked this conversation as resolved.
Outdated
|
||
|
|
||
| # Build Voigt stress from pressure tensor if available | ||
| try: | ||
| pxx = edr_data["Pres-XX"][idx] | ||
| pyy = edr_data["Pres-YY"][idx] | ||
| pzz = edr_data["Pres-ZZ"][idx] | ||
| pyz = edr_data["Pres-YZ"][idx] | ||
| pxz = edr_data["Pres-XZ"][idx] | ||
| pxy = edr_data["Pres-XY"][idx] | ||
| # GROMACS pressure in bar -> store as-is (not ASE native eV/ų) | ||
| stress = np.array([pxx, pyy, pzz, pyz, pxz, pxy]) | ||
|
Comment on lines
+186
to
+187
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Stress units mismatch: GROMACS pressure is in bar, ASE expects eV/ų. The comment acknowledges the unit discrepancy, but storing stress in bar breaks ASE's stress conventions. Any code calling Convert bar to eV/ų: 1 bar = 1e-4 GPa = 6.2415e-7 eV/ų (use Proposed fix+from ase import units
+
+# bar -> eV/ų
+BAR_TO_EV_ANG3 = 1.0 / (units.bar / (units.eV / units.Ang**3))
+
...
- # GROMACS pressure in bar -> store as-is (not ASE native eV/ų)
- stress = np.array([pxx, pyy, pzz, pyz, pxz, pxy])
+ # Convert GROMACS pressure (bar) to ASE stress (eV/ų)
+ stress = np.array([pxx, pyy, pzz, pyz, pxz, pxy]) * BAR_TO_EV_ANG3 |
||
| except KeyError: | ||
| pass | ||
|
|
||
| # Store all EDR terms for this frame | ||
| for term in edr_terms: | ||
| extra_results[term] = float(edr_data[term][idx]) | ||
|
Comment on lines
+191
to
+193
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
All EDR terms (kJ/mol energies, bar pressures, K temperatures, …) are written verbatim into 🤖 Prompt for AI Agents |
||
|
|
||
| if energy is not None or forces is not None: | ||
| calc = SinglePointCalculator( | ||
| atoms, | ||
| energy=energy, | ||
| forces=forces, | ||
| stress=stress, | ||
| ) | ||
| calc.results.update(extra_results) | ||
| atoms.calc = calc | ||
|
|
||
| frames.append(atoms) | ||
|
|
||
| return frames | ||
|
|
||
|
|
||
| class Gmx2Frames(zntrack.Node): | ||
| """Convert GROMACS output files to ASE Atoms frames. | ||
|
|
||
| Reads topology, trajectory, and optionally energy (.edr) files | ||
| to produce a list of ASE Atoms with positions, velocities, forces, | ||
| energies, and stress where available. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| topology : Path | ||
| Path to a GROMACS topology/structure file (.gro, .tpr). | ||
| trajectory : Path, optional | ||
| Path to a trajectory file (.xtc, .trr). | ||
| edr : Path, optional | ||
| Path to a GROMACS energy file (.edr). | ||
| start : int, optional | ||
| First frame index to read. | ||
| stop : int, optional | ||
| Last frame index (exclusive) to read. | ||
| step : int, optional | ||
| Step size for frame selection. | ||
|
|
||
| Examples | ||
| -------- | ||
| >>> with project: | ||
| ... md = ips.Gmx2Frames( | ||
| ... topology="gromacs/system.gro", | ||
| ... trajectory="gromacs/production.xtc", | ||
| ... edr="gromacs/production.edr", | ||
| ... start=1, | ||
| ... ) | ||
| """ | ||
|
Comment on lines
+232
to
+241
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Docstring example uses a non-existent class name and is not testable. The example references Proposed fix Examples
--------
- >>> with project:
- ... md = ips.Gmx2Frames(
- ... topology="gromacs/system.gro",
- ... trajectory="gromacs/production.xtc",
- ... edr="gromacs/production.edr",
- ... start=1,
- ... )
+ >>> project = ips.Project()
+ >>> with project:
+ ... md = ips.AddDataGMX(
+ ... topology="gromacs/system.gro",
+ ... trajectory="gromacs/production.xtc",
+ ... edr="gromacs/production.edr",
+ ... start=1,
+ ... )
+ >>> project.run()As per coding guidelines: "Each Node's docstring should include a testable example using 🤖 Prompt for AI Agents |
||
|
|
||
| topology: Path = zntrack.deps_path() | ||
| trajectory: Path = zntrack.deps_path(None) | ||
| edr: Path = zntrack.deps_path(None) | ||
| start: int = zntrack.params(None) | ||
| stop: int = zntrack.params(None) | ||
| step: int = zntrack.params(None) | ||
|
coderabbitai[bot] marked this conversation as resolved.
Outdated
|
||
|
|
||
| frames_path: Path = zntrack.outs_path(zntrack.nwd / "frames.h5") | ||
|
|
||
| def run(self): | ||
|
MrJulEnergy marked this conversation as resolved.
Outdated
|
||
| data = gmx_to_ase( | ||
| topology=str(self.topology), | ||
| trajectory=str(self.trajectory) if self.trajectory else None, | ||
| edr=str(self.edr) if self.edr else None, | ||
| start=self.start, | ||
| stop=self.stop, | ||
| step=self.step, | ||
| ) | ||
| frame_io = znh5md.IO(self.frames_path) | ||
| frame_io.extend(data) | ||
|
|
||
| @property | ||
| def frames(self) -> typing.List[Atoms]: | ||
| with self.state.fs.open(self.frames_path, "rb") as f: | ||
| with h5py.File(f) as file: | ||
| return znh5md.IO(file_handle=file)[:] | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| # Example: load the production trajectory with energies | ||
| frames = gmx_to_ase( | ||
| "gromacs/system.gro", | ||
| "gromacs/production.xtc", | ||
| edr="gromacs/production.edr", | ||
| ) | ||
| print(f"Loaded {len(frames)} frames, {len(frames[0])} atoms per frame") | ||
| print(f"Cell: {frames[0].cell.cellpar()}") | ||
| print(f"Potential energy (frame 0): {frames[0].get_potential_energy()} kJ/mol") | ||
| print(f"All EDR terms on frame 0: {list(frames[1].calc.results.keys())}") | ||
|
coderabbitai[bot] marked this conversation as resolved.
Outdated
|
||
Uh oh!
There was an error while loading. Please reload this page.