import subprocess
import numpy as np
import matplotlib.pyplot as plt
import plumed
import os
# small utility to clean up backup files:
def clean():
print(subprocess.run("rm -f \#*; rm -fr bck.*",shell=True))
# small utility to run with mpiexec
def run_mpi(cmd,nprocs=1):
return subprocess.run("""
module load intel/2021.1 gcc openmpi3 cuda
export PLUMED_KERNEL=/scratch/bussi/masterclass-21-7/install/lib/libplumedKernel.so
export LD_LIBRARY_PATH=/scratch/bussi/masterclass-21-7/install/lib:$LD_LIBRARY_PATH
export PATH=/scratch/bussi/masterclass-21-7/install/bin:$PATH
mpiexec -np {} {}
""".format(nprocs,cmd),shell=True)
def parse_log(path="md.log"):
with open(path,"r") as f:
found=False
for l in f:
ll=l.split()
if len(ll)>1 and ll[0]=="Core" and ll[1]=="t":
found=True
if found:
print(l,end="")
clean()
# check executables
run_mpi("gmx_mpi")
run_mpi("plumed")
Exercise 1a¶
In this first exercise, we optimize the settings for the METAD action.
We first compute the basic performance using the recommended input file.
with open("plumed-a1.dat","w") as f:
print("""
# vim:ft=plumed
DEBUG DETAILED_TIMERS
NA: GROUP ATOMS=1
CL: GROUP ATOMS=2
WAT: GROUP ATOMS=3-8544:3
d: DISTANCE ATOMS=NA,CL
cn: COORDINATION GROUPA=NA GROUPB=WAT R_0=0.3
PRINT ARG=d,cn STRIDE=100 FILE=COLVAR
METAD ARG=d,cn SIGMA=0.05,0.1 HEIGHT=0.1 PACE=10 BIASFACTOR=5
""",file=f)
run_mpi("gmx_mpi mdrun -plumed plumed-a1.dat -nsteps 5000 -ntomp 12 -pin on",1)
parse_log()
col=plumed.read_as_pandas("COLVAR")
plt.plot(col.d,col.cn,"x")
We then check if the performance changes by using a grid.
with open("plumed-a2.dat","w") as f:
print("""
# vim:ft=plumed
DEBUG DETAILED_TIMERS
NA: GROUP ATOMS=1
CL: GROUP ATOMS=2
WAT: GROUP ATOMS=3-8544:3
d: DISTANCE ATOMS=NA,CL
cn: COORDINATION GROUPA=NA GROUPB=WAT R_0=0.3
PRINT ARG=d,cn STRIDE=100 FILE=COLVAR
METAD ARG=d,cn SIGMA=0.05,0.1 HEIGHT=0.1 PACE=10 BIASFACTOR=5 GRID_MIN=0,4 GRID_MAX=4,8
""",file=f)
run_mpi("gmx_mpi mdrun -plumed plumed-a2.dat -nsteps 5000 -ntomp 12 -pin on",1)
parse_log()
The change is limited, and there's actually a small slowdown... Anyway, we know that on the long run (when we will have many hills) using a grid will be critical for performance. So, we keep on with the grid.
We then test what happens when using a larger PACE (with larger height). Notice that, in the long run, hills height typically become so small (due to BIASFACTOR) that the height of the individual hill will be a tiny fraction of kBT. In this regime, there's no effect in changing PACE, provided that height is changed by the same factor.
with open("plumed-a3.dat","w") as f:
print("""
# vim:ft=plumed
DEBUG DETAILED_TIMERS
NA: GROUP ATOMS=1
CL: GROUP ATOMS=2
WAT: GROUP ATOMS=3-8544:3
d: DISTANCE ATOMS=NA,CL
cn: COORDINATION GROUPA=NA GROUPB=WAT R_0=0.3
PRINT ARG=d,cn STRIDE=100 FILE=COLVAR
METAD ARG=d,cn SIGMA=0.05,0.1 HEIGHT=1.0 PACE=100 BIASFACTOR=5 GRID_MIN=0,4 GRID_MAX=4,8
""",file=f)
run_mpi("gmx_mpi mdrun -plumed plumed-a3.dat -nsteps 5000 -ntomp 12 -pin on",1)
parse_log()
Little impact. We keep on with these settings, even though for this system we didn't gain much.
Exercise 1b¶
Next we proceed in optimizing the calculation of COORDINATION. In order to safely use neighbor lists, we have to make sure that the switching function goes exactly to zero. This can be done by setting D_MAX.
We then test different values of D_MAX, checking if they lead to different values for the COORDINATION variable.
with open("plumed-b1.dat","w") as f:
print("""
# vim:ft=plumed
DEBUG DETAILED_TIMERS
NA: GROUP ATOMS=1
CL: GROUP ATOMS=2
WAT: GROUP ATOMS=3-8544:3
d: DISTANCE ATOMS=NA,CL
cn: COORDINATION GROUPA=NA GROUPB=WAT R_0=0.3
cn1: COORDINATION GROUPA=NA GROUPB=WAT SWITCH={RATIONAL R_0=0.3 D_MAX=0.75}
cn2: COORDINATION GROUPA=NA GROUPB=WAT SWITCH={RATIONAL R_0=0.3 D_MAX=1.0}
cn3: COORDINATION GROUPA=NA GROUPB=WAT SWITCH={RATIONAL R_0=0.3 D_MAX=1.25}
cn4: COORDINATION GROUPA=NA GROUPB=WAT SWITCH={RATIONAL R_0=0.3 D_MAX=1.5}
PRINT ARG=d,cn STRIDE=100 FILE=COLVAR
METAD ARG=d,cn SIGMA=0.05,0.1 HEIGHT=1.0 PACE=100 BIASFACTOR=5 GRID_MIN=0,4 GRID_MAX=4,8
PRINT ARG=cn,cn1,cn2,cn3,cn4 FILE=TEST
""",file=f)
run_mpi("gmx_mpi mdrun -plumed plumed-b1.dat -nsteps 5000 -ntomp 12 -pin on",1)
parse_log()
test=plumed.read_as_pandas("TEST")
test
plt.plot(test.cn,test.cn1,"x")
plt.plot(test.cn,test.cn2,"x")
plt.plot(test.cn,test.cn3,"x")
plt.plot(test.cn,test.cn4,"x")
The obtained values are equivalent, though they are related by a uniform shift. It's then certainly fine to use the smallest value (0.75).
We now test neighbor lists. To do so, we try different buffer width and different strides. Notice that a larger buffer (NL_CUTOFF - D_MAX) allows to use a larger stride. However, increasing too much the stride is not useful.
with open("plumed-b2.dat","w") as f:
print("""
# vim:ft=plumed
DEBUG DETAILED_TIMERS
NA: GROUP ATOMS=1
CL: GROUP ATOMS=2
WAT: GROUP ATOMS=3-8544:3
d: DISTANCE ATOMS=NA,CL
cn: COORDINATION GROUPA=NA GROUPB=WAT SWITCH={RATIONAL R_0=0.3 D_MAX=0.75}
cn1: COORDINATION GROUPA=NA GROUPB=WAT SWITCH={RATIONAL R_0=0.3 D_MAX=0.75} NLIST NL_CUTOFF=0.8 NL_STRIDE=5
cn2: COORDINATION GROUPA=NA GROUPB=WAT SWITCH={RATIONAL R_0=0.3 D_MAX=0.75} NLIST NL_CUTOFF=0.8 NL_STRIDE=10
cn3: COORDINATION GROUPA=NA GROUPB=WAT SWITCH={RATIONAL R_0=0.3 D_MAX=0.75} NLIST NL_CUTOFF=0.8 NL_STRIDE=20
cn4: COORDINATION GROUPA=NA GROUPB=WAT SWITCH={RATIONAL R_0=0.3 D_MAX=0.75} NLIST NL_CUTOFF=0.8 NL_STRIDE=50
cn5: COORDINATION GROUPA=NA GROUPB=WAT SWITCH={RATIONAL R_0=0.3 D_MAX=0.75} NLIST NL_CUTOFF=0.8 NL_STRIDE=100
cn6: COORDINATION GROUPA=NA GROUPB=WAT SWITCH={RATIONAL R_0=0.3 D_MAX=0.75} NLIST NL_CUTOFF=0.8 NL_STRIDE=200
cn7: COORDINATION GROUPA=NA GROUPB=WAT SWITCH={RATIONAL R_0=0.3 D_MAX=0.75} NLIST NL_CUTOFF=1.0 NL_STRIDE=5
cn8: COORDINATION GROUPA=NA GROUPB=WAT SWITCH={RATIONAL R_0=0.3 D_MAX=0.75} NLIST NL_CUTOFF=1.0 NL_STRIDE=10
cn9: COORDINATION GROUPA=NA GROUPB=WAT SWITCH={RATIONAL R_0=0.3 D_MAX=0.75} NLIST NL_CUTOFF=1.0 NL_STRIDE=20
cn10: COORDINATION GROUPA=NA GROUPB=WAT SWITCH={RATIONAL R_0=0.3 D_MAX=0.75} NLIST NL_CUTOFF=1.0 NL_STRIDE=50
cn11: COORDINATION GROUPA=NA GROUPB=WAT SWITCH={RATIONAL R_0=0.3 D_MAX=0.75} NLIST NL_CUTOFF=1.0 NL_STRIDE=100
cn12: COORDINATION GROUPA=NA GROUPB=WAT SWITCH={RATIONAL R_0=0.3 D_MAX=0.75} NLIST NL_CUTOFF=1.0 NL_STRIDE=200
PRINT ARG=d,cn STRIDE=100 FILE=COLVAR
METAD ARG=d,cn SIGMA=0.05,0.1 HEIGHT=1.0 PACE=100 BIASFACTOR=5 GRID_MIN=0,4 GRID_MAX=4,8
PRINT ARG=(cn.*) FILE=TEST
""",file=f)
run_mpi("gmx_mpi mdrun -plumed plumed-b2.dat -nsteps 5000 -ntomp 12 -pin on",1)
parse_log()
We also compare the values
test=plumed.read_as_pandas("TEST")
test
for i in range(6,0,-1):
plt.plot(test["cn"+str(i)]-test.cn,"x",label=str(i))
plt.legend()
plt.show()
for i in range(12,6,-1):
plt.plot(test["cn"+str(i)]-test.cn,"x",label=str(i))
plt.legend()
plt.show()
We will use the settings corresponding to cn2:
cn2: COORDINATION GROUPA=NA GROUPB=WAT SWITCH={RATIONAL R_0=0.3 D_MAX=0.75} NLIST NL_CUTOFF=0.8 NL_STRIDE=10
Notice that cn3 fails in returning the correct value sometime, and all those with NL_CUTOFF=1.0 are slower
np.std(test.cn3-test.cn), np.std(test.cn2-test.cn)
np.std(test.cn11-test.cn), np.std(test.cn10-test.cn)
Exercise 1c¶
We then try to optimize the number of cores.
with open("plumed-c1.dat","w") as f:
print("""
# vim:ft=plumed
DEBUG DETAILED_TIMERS
NA: GROUP ATOMS=1
CL: GROUP ATOMS=2
WAT: GROUP ATOMS=3-8544:3
d: DISTANCE ATOMS=NA,CL
cn: COORDINATION GROUPA=NA GROUPB=WAT SWITCH={RATIONAL R_0=0.3 D_MAX=0.75} NLIST NL_CUTOFF=0.8 NL_STRIDE=10
PRINT ARG=d,cn STRIDE=100 FILE=COLVAR
METAD ARG=d,cn SIGMA=0.05,0.1 HEIGHT=1.0 PACE=100 BIASFACTOR=5 GRID_MIN=0,4 GRID_MAX=4,8
""",file=f)
run_mpi("gmx_mpi mdrun -plumed plumed-c1.dat -nsteps 5000 -ntomp 12 -pin on",1)
parse_log()
run_mpi("gmx_mpi mdrun -plumed plumed-c1.dat -nsteps 5000 -ntomp 6 -pin on",2)
parse_log()
run_mpi("gmx_mpi mdrun -plumed plumed-c1.dat -nsteps 5000 -ntomp 24 -pin on",1)
parse_log()
On this machine it is convenient to use 1 MPI process and 12 threads.
Exercise 1d¶
Next we try different settings for metadynamics. We then monitor the number binding/unbinding events that we observe in a fixed simulation time.
I first try to use wider hills
with open("plumed-d1.dat","w") as f:
print("""
# vim:ft=plumed
DEBUG DETAILED_TIMERS
NA: GROUP ATOMS=1
CL: GROUP ATOMS=2
WAT: GROUP ATOMS=3-8544:3
d: DISTANCE ATOMS=NA,CL
cn: COORDINATION GROUPA=NA GROUPB=WAT SWITCH={RATIONAL R_0=0.3 D_MAX=0.75} NLIST NL_CUTOFF=0.8 NL_STRIDE=10
PRINT ARG=d,cn STRIDE=100 FILE=COLVAR
METAD ARG=d,cn SIGMA=0.05,0.1 HEIGHT=1.0 PACE=100 BIASFACTOR=5 GRID_MIN=0,0 GRID_MAX=4,10
""",file=f)
run_mpi("gmx_mpi mdrun -plumed plumed-d1.dat -nsteps 500000 -ntomp 12 -pin on",1)
! mv HILLS HILLS_1
! mv traj_comp.xtc traj_comp1.xtc
with open("plumed-d2.dat","w") as f:
print("""
# vim:ft=plumed
DEBUG DETAILED_TIMERS
NA: GROUP ATOMS=1
CL: GROUP ATOMS=2
WAT: GROUP ATOMS=3-8544:3
d: DISTANCE ATOMS=NA,CL
cn: COORDINATION GROUPA=NA GROUPB=WAT SWITCH={RATIONAL R_0=0.3 D_MAX=0.75} NLIST NL_CUTOFF=0.8 NL_STRIDE=10
PRINT ARG=d,cn STRIDE=100 FILE=COLVAR
METAD ARG=d,cn SIGMA=0.1,0.2 HEIGHT=1.0 PACE=100 BIASFACTOR=5 GRID_MIN=0,0 GRID_MAX=4,10
""",file=f)
run_mpi("gmx_mpi mdrun -plumed plumed-d2.dat -nsteps 500000 -ntomp 12 -pin on",1)
! mv HILLS HILLS_2
! mv traj_comp.xtc traj_comp2.xtc
!plumed sum_hills --hills HILLS_1 --outfile fes1.dat --idw d --kt 2.5
!plumed sum_hills --hills HILLS_2 --outfile fes2.dat --idw d --kt 2.5
fes1=plumed.read_as_pandas("fes1.dat")
fes2=plumed.read_as_pandas("fes2.dat")
plt.plot(fes1.d,fes1.projection)
plt.plot(fes2.d,fes2.projection)
h=plumed.read_as_pandas("HILLS_1")
plt.plot(h.d)
h=plumed.read_as_pandas("HILLS_2")
plt.plot(h.d)
Looking at the number of events, it doesn't seem useful.
I try to remove one variable to make the calculation 1D, and to restrict the domain by using walls.
with open("plumed-d3.dat","w") as f:
print("""
# vim:ft=plumed
DEBUG DETAILED_TIMERS
NA: GROUP ATOMS=1
CL: GROUP ATOMS=2
WAT: GROUP ATOMS=3-8544:3
d: DISTANCE ATOMS=NA,CL
cn: COORDINATION GROUPA=NA GROUPB=WAT SWITCH={RATIONAL R_0=0.3 D_MAX=0.75} NLIST NL_CUTOFF=0.8 NL_STRIDE=10
PRINT ARG=d,cn STRIDE=100 FILE=COLVAR
METAD ARG=d SIGMA=0.05 HEIGHT=1.0 PACE=100 BIASFACTOR=5 GRID_MIN=0 GRID_MAX=4
""",file=f)
run_mpi("gmx_mpi mdrun -plumed plumed-d3.dat -nsteps 500000 -ntomp 12 -pin on",1)
! mv HILLS HILLS_3
! mv traj_comp.xtc traj_comp3.xtc
with open("plumed-d4.dat","w") as f:
print("""
# vim:ft=plumed
DEBUG DETAILED_TIMERS
NA: GROUP ATOMS=1
CL: GROUP ATOMS=2
WAT: GROUP ATOMS=3-8544:3
d: DISTANCE ATOMS=NA,CL
PRINT ARG=d,cn STRIDE=100 FILE=COLVAR
UPPER_WALLS ARG=d AT=1.5 KAPPA=1000.0
METAD ARG=d SIGMA=0.05 HEIGHT=1.0 PACE=100 BIASFACTOR=5 GRID_MIN=0 GRID_MAX=4
""",file=f)
run_mpi("gmx_mpi mdrun -plumed plumed-d4.dat -nsteps 500000 -ntomp 12 -pin on",1)
! mv HILLS HILLS_4
! mv traj_comp.xtc traj_comp4.xtc
h=plumed.read_as_pandas("HILLS_3")
plt.plot(h.d)
h=plumed.read_as_pandas("HILLS_4")
plt.plot(h.d)
It seems the coordination number was not really needed for this problem. I can also see that by restricting the domain I can observe more events.
!plumed sum_hills --hills HILLS_3 --outfile fes3.dat
!plumed sum_hills --hills HILLS_4 --outfile fes4.dat
fes3=plumed.read_as_pandas("fes3.dat")
fes4=plumed.read_as_pandas("fes4.dat")
plt.plot(fes3.d,fes3["file.free"])
plt.plot(fes3.d,fes3["file.free"]+2*2.5*np.log(fes3.d))
plt.plot(fes4.d,fes4["file.free"])
plt.plot(fes4.d,fes4["file.free"]+2*2.5*np.log(fes4.d))
I notice that perhaps the bound minimum is narrower than expected, I thus try to decrease hills width. Notice that I am not explicitly computing the stddev of the variable here, probably that would have been wiser.
with open("plumed-d5.dat","w") as f:
print("""
# vim:ft=plumed
DEBUG DETAILED_TIMERS
NA: GROUP ATOMS=1
CL: GROUP ATOMS=2
WAT: GROUP ATOMS=3-8544:3
d: DISTANCE ATOMS=NA,CL
cn: COORDINATION GROUPA=NA GROUPB=WAT SWITCH={RATIONAL R_0=0.3 D_MAX=0.75} NLIST NL_CUTOFF=0.8 NL_STRIDE=10
PRINT ARG=d,cn STRIDE=100 FILE=COLVAR
UPPER_WALLS ARG=d AT=1.5 KAPPA=1000.0
METAD ARG=d SIGMA=0.025 HEIGHT=1.0 PACE=100 BIASFACTOR=5 GRID_MIN=0 GRID_MAX=4
""",file=f)
run_mpi("gmx_mpi mdrun -plumed plumed-d5.dat -nsteps 500000 -ntomp 12 -pin on",1)
! mv HILLS HILLS_5
! mv traj_comp.xtc traj_comp5.xtc
h=plumed.read_as_pandas("HILLS_5")
plt.plot(h.d)
!plumed sum_hills --hills HILLS_5 --outfile fes5.dat
fes3=plumed.read_as_pandas("fes3.dat")
fes4=plumed.read_as_pandas("fes4.dat")
fes5=plumed.read_as_pandas("fes5.dat")
plt.plot(fes3.d,fes3["file.free"])
plt.plot(fes3.d,fes3["file.free"]+2*2.5*np.log(fes3.d))
plt.plot(fes4.d,fes4["file.free"])
plt.plot(fes4.d,fes4["file.free"]+2*2.5*np.log(fes4.d))
plt.plot(fes5.d,fes5["file.free"])
plt.plot(fes5.d,fes5["file.free"]+2*2.5*np.log(fes5.d))
0.1 m = 1e8 nm
1 L = 1e24 nm
1 L / NA = 1e24 / 6.02214076e23 = 1.6605390671738467
def deltaG(fes,dbound,dmin,dmax):
bound=np.sum(np.exp(-np.array(fes["file.free"])[np.where(fes.d<dbound)]/2.5))
unbound=np.sum(np.exp(-np.array(fes["file.free"])[np.where(np.logical_and(fes.d>dmin,fes.d<dmax))]/2.5))
Vunbound=4/3*np.pi*(dmax**3-dmin**3)
V0=1.66
unbound /= (Vunbound/V0)
return -2.5*np.log(bound/unbound)
deltaG(fes5,0.4,1.0,1.4)
It seems I have a reasonable setup. I will then run it for 10 ns.
run_mpi("gmx_mpi mdrun -plumed plumed-d5.dat -nsteps 5000000 -ntomp 12 -pin on",1)
! mv HILLS HILLS_5L
! mv traj_comp.xtc traj_comp5L.xtc
h=plumed.read_as_pandas("HILLS_5L")
plt.plot(h.d)
!plumed sum_hills --hills HILLS_5L --outfile fes5l.dat
fes5l=plumed.read_as_pandas("fes5l.dat")
plt.plot(fes5l.d,fes5l["file.free"])
plt.plot(fes5l.d,fes5l["file.free"]+2*2.5*np.log(fes5l.d))
dg=[]
for dmin in np.linspace(0.5,1.2):
dg.append(deltaG(fes5l,0.4,dmin,1.4))
plt.plot(dg)
Now I try to compute the statistical error with bootstrap. In order to avoid any issues with the tails of the Gaussian, I recompute the weights on the sampled frames rather than relying on sum_hills.
with open("plumed-d5-rew.dat","w") as f:
print("""
# vim:ft=plumed
d: READ FILE=HILLS_5L VALUES=d IGNORE_FORCES IGNORE_TIME
walls: UPPER_WALLS ARG=d AT=1.5 KAPPA=1000.0
metad: METAD ARG=d SIGMA=0.025 HEIGHT=0.0 PACE=1000000 BIASFACTOR=5 GRID_MIN=0 GRID_MAX=4 RESTART=YES TEMP=300 FILE=HILLS_5L
PRINT FILE=5rew ARG=d,*.bias
""",file=f)
run_mpi("env PLUMED_NUM_THREADS=12 plumed driver --plumed plumed-d5-rew.dat --noatoms",1)
rew=plumed.read_as_pandas("5rew")
rew
d=np.array(rew.d)
w=np.array(np.exp(rew["metad.bias"]/2.5))
def deltaG(d,w,dbound,dmin,dmax):
bound=np.sum((d<dbound)*w)
unbound=np.sum(np.logical_and(d>dmin,d<dmax)*w)
Vunbound=4/3*np.pi*(dmax**3-dmin**3)
V0=1.66
unbound /= (Vunbound/V0)
return -2.5*np.log(bound/unbound)
deltaG(d,w,0.4,1.0,1.4)
# block bootstrap with 10 blocks
dG=[]
NB=10
for i in range(1000):
c=np.random.choice(NB,NB)
dd=d.reshape((NB,-1))[c].flatten()
ww=w.reshape((NB,-1))[c].flatten()
dG.append(deltaG(dd,ww,0.4,1.0,1.4))
np.average(dG),np.std(dG)
The standard binding free energy is very close to zero, within its error. Notice that a value of zero has no particular meaning, since it arbitrarily depends on the definition of a molar volume. Also notice that computing the affinity using the actual values (rather than sum_hills) is more reliable, since it allows to detect if the ions are before or after the first energy barrier without any effect from the Gaussian tails.