import plumed
import matplotlib.pyplot as plt
import numpy as np
import subprocess
# small utility to clean up backup files:
def clean():
print(subprocess.run("rm -f \#*; rm -fr bck.*",shell=True))
Exercise 1¶
In this first exercise we analyze a simulation on the fly
with open("plumed.dat","w") as f:
print("""
# vim:ft=plumed
MOLINFO STRUCTURE=reference.pdb
phi: TORSION ATOMS=@phi-2
psi: TORSION ATOMS=@psi-2
# use the command below to compute the histogram of phi
# we use a smooth kernel to produce a nicer graph here
hhphi: HISTOGRAM ARG=phi STRIDE=100 GRID_MIN=-pi GRID_MAX=pi GRID_BIN=600 BANDWIDTH=0.05
ffphi: CONVERT_TO_FES GRID=hhphi # no need to set TEMP here, PLUMED will obtain it from GROMACS
DUMPGRID GRID=ffphi FILE=fes_phi.dat STRIDE=200000 # stride is needed here since PLUMED does not know when the simulation is over
hhpsi: HISTOGRAM ARG=psi STRIDE=100 GRID_MIN=-pi GRID_MAX=pi GRID_BIN=600 BANDWIDTH=0.05
ffpsi: CONVERT_TO_FES GRID=hhpsi # no need to set TEMP here, PLUMED will obtain it from GROMACS
DUMPGRID GRID=ffpsi FILE=fes_psi.dat STRIDE=200000 # stride is needed here since PLUMED does not know when the simulation is over
PRINT ARG=phi,psi FILE=colvar.dat STRIDE=100 # use this command to write phi and psi on a file named colvar.dat, every 100 steps
""",file=f)
clean()
subprocess.run("gmx mdrun -plumed plumed.dat -s topolA.tpr -nsteps 200000 -x traj_comp_unbiased.xtc",shell=True)
colvar=plumed.read_as_pandas("colvar.dat")
plt.plot(colvar.time,colvar.phi,"x",label="phi")
plt.plot(colvar.time,colvar.psi,"x",label="psi")
plt.xlabel("time")
plt.ylabel("$\phi$")
plt.legend()
plt.show()
plt.plot(colvar.phi,colvar.psi,"x")
plt.xlabel("$\phi$")
plt.ylabel("$\psi$")
plt.xlim((-np.pi,np.pi))
plt.ylim((-np.pi,np.pi))
plt.show()
fes_phi=plumed.read_as_pandas("fes_phi.dat").replace([np.inf, -np.inf], np.nan).dropna()
plt.plot(fes_phi.phi,fes_phi.ffphi)
plt.xlim((-np.pi,np.pi))
plt.xlabel("$\phi$")
plt.ylabel("$F(\phi)$")
plt.show()
fes_psi=plumed.read_as_pandas("fes_psi.dat").replace([np.inf, -np.inf], np.nan).dropna()
plt.plot(fes_psi.psi,fes_psi.ffpsi)
plt.xlim((-np.pi,np.pi))
plt.xlabel("$\psi$")
plt.ylabel("$F(\psi)$")
plt.show()
Comments:
- Both psi and phi display many transitions between two minima
- Clearly, the two variables are correlated in that the two metastable states can be equivalently distinguished using phi or using psi
Exercise 2¶
We now run a biased MD acting on variable phi
with open("plumed_biased1.dat","w") as f:
print("""
# vim:ft=plumed
MOLINFO STRUCTURE=reference.pdb
phi: TORSION ATOMS=@phi-2
psi: TORSION ATOMS=@psi-2
f: CUSTOM ARG=phi FUNC=-10*sin(x+2) PERIODIC=NO
BIASVALUE ARG=f
lw: REWEIGHT_BIAS
# use the command below to compute the histogram of phi
# we use a smooth kernel to produce a nicer graph here
hhphi: HISTOGRAM ARG=phi STRIDE=100 GRID_MIN=-pi GRID_MAX=pi GRID_BIN=600 BANDWIDTH=0.05
ffphi: CONVERT_TO_FES GRID=hhphi # no need to set TEMP here, PLUMED will obtain it from GROMACS
DUMPGRID GRID=ffphi FILE=fes_phi_biased1.dat STRIDE=200000 # stride is needed here since PLUMED does not know when the simulation is over
hhpsi: HISTOGRAM ARG=psi STRIDE=100 GRID_MIN=-pi GRID_MAX=pi GRID_BIN=600 BANDWIDTH=0.05
ffpsi: CONVERT_TO_FES GRID=hhpsi # no need to set TEMP here, PLUMED will obtain it from GROMACS
DUMPGRID GRID=ffpsi FILE=fes_psi_biased1.dat STRIDE=200000 # stride is needed here since PLUMED does not know when the simulation is over
# we use a smooth kernel to produce a nicer graph here
hhphir: HISTOGRAM ARG=phi STRIDE=100 GRID_MIN=-pi GRID_MAX=pi GRID_BIN=600 BANDWIDTH=0.05 LOGWEIGHTS=lw
ffphir: CONVERT_TO_FES GRID=hhphir # no need to set TEMP here, PLUMED will obtain it from GROMACS
DUMPGRID GRID=ffphir FILE=fes_phi_biased1r.dat STRIDE=200000 # stride is needed here since PLUMED does not know when the simulation is over
hhpsir: HISTOGRAM ARG=psi STRIDE=100 GRID_MIN=-pi GRID_MAX=pi GRID_BIN=600 BANDWIDTH=0.05 LOGWEIGHTS=lw
ffpsir: CONVERT_TO_FES GRID=hhpsir # no need to set TEMP here, PLUMED will obtain it from GROMACS
DUMPGRID GRID=ffpsir FILE=fes_psi_biased1r.dat STRIDE=200000 # stride is needed here since PLUMED does not know when the simulation is over
PRINT ARG=phi,psi FILE=colvar_biased1.dat STRIDE=100 # use this command to write phi and psi on a file named colvar.dat, every 100 steps
""",file=f)
clean()
subprocess.run("gmx mdrun -plumed plumed_biased1.dat -s topolA.tpr -nsteps 200000 -x traj_comp_biased1.xtc",shell=True)
colvar=plumed.read_as_pandas("colvar_biased1.dat")
plt.plot(colvar.time,colvar.phi,"x",label="phi")
plt.plot(colvar.time,colvar.psi,"x",label="psi")
plt.xlabel("time")
plt.ylabel("$\phi$")
plt.legend()
plt.show()
plt.plot(colvar.phi,colvar.psi,"x")
plt.xlabel("$\phi$")
plt.ylabel("$\psi$")
plt.xlim((-np.pi,np.pi))
plt.ylim((-np.pi,np.pi))
plt.show()
fes_phi=plumed.read_as_pandas("fes_phi.dat").replace([np.inf, -np.inf], np.nan).dropna()
plt.plot(fes_phi.phi,fes_phi.ffphi,label="original")
fes_phib=plumed.read_as_pandas("fes_phi_biased1.dat").replace([np.inf, -np.inf], np.nan).dropna()
plt.plot(fes_phib.phi,fes_phib.ffphi,label="biased")
fes_phir=plumed.read_as_pandas("fes_phi_biased1r.dat").replace([np.inf, -np.inf], np.nan).dropna()
plt.plot(fes_phir.phi,fes_phir.ffphir,label="reweighted")
plt.legend()
plt.xlim((-np.pi,np.pi))
plt.xlabel("$\phi$")
plt.ylabel("$F(\phi)$")
plt.show()
fes_psi=plumed.read_as_pandas("fes_psi.dat").replace([np.inf, -np.inf], np.nan).dropna()
plt.plot(fes_psi.psi,fes_psi.ffpsi,label="original")
fes_psib=plumed.read_as_pandas("fes_psi_biased1.dat").replace([np.inf, -np.inf], np.nan).dropna()
plt.plot(fes_psib.psi,fes_psib.ffpsi,label="biased")
fes_psir=plumed.read_as_pandas("fes_psi_biased1r.dat").replace([np.inf, -np.inf], np.nan).dropna()
plt.plot(fes_psir.psi,fes_psir.ffpsir,label="reweighted")
plt.legend()
plt.xlim((-np.pi,np.pi))
plt.xlabel("$\psi$")
plt.ylabel("$F(\psi)$")
plt.show()
Comments:
- In the biased free energy it seems that minimum at higher phi (lower psi) is more stable
- When "unbiasing" the result the two minima seems more similar.
- The free energy obtained after unbiasing is not identical to the one obtained in the original simulation. This is simply because both simulations have been run for a finite number of steps. The results should be consistent within their statistical errors.
Exercise 3¶
We will now run a third simulation that is even more biased and then combine the three simulations using binless WHAM.
with open("plumed_biased2.dat","w") as f:
print("""
# vim:ft=plumed
MOLINFO STRUCTURE=reference.pdb
phi: TORSION ATOMS=@phi-2
psi: TORSION ATOMS=@psi-2
f: CUSTOM ARG=phi FUNC=-20*sin(x+2) PERIODIC=NO
BIASVALUE ARG=f
lw: REWEIGHT_BIAS
# use the command below to compute the histogram of phi
# we use a smooth kernel to produce a nicer graph here
hhphi: HISTOGRAM ARG=phi STRIDE=100 GRID_MIN=-pi GRID_MAX=pi GRID_BIN=600 BANDWIDTH=0.05
ffphi: CONVERT_TO_FES GRID=hhphi # no need to set TEMP here, PLUMED will obtain it from GROMACS
DUMPGRID GRID=ffphi FILE=fes_phi_biased2.dat STRIDE=200000 # stride is needed here since PLUMED does not know when the simulation is over
hhpsi: HISTOGRAM ARG=psi STRIDE=100 GRID_MIN=-pi GRID_MAX=pi GRID_BIN=600 BANDWIDTH=0.05
ffpsi: CONVERT_TO_FES GRID=hhpsi # no need to set TEMP here, PLUMED will obtain it from GROMACS
DUMPGRID GRID=ffpsi FILE=fes_psi_biased2.dat STRIDE=200000 # stride is needed here since PLUMED does not know when the simulation is over
# we use a smooth kernel to produce a nicer graph here
hhphir: HISTOGRAM ARG=phi STRIDE=100 GRID_MIN=-pi GRID_MAX=pi GRID_BIN=600 BANDWIDTH=0.05 LOGWEIGHTS=lw
ffphir: CONVERT_TO_FES GRID=hhphir # no need to set TEMP here, PLUMED will obtain it from GROMACS
DUMPGRID GRID=ffphir FILE=fes_phi_biased2r.dat STRIDE=200000 # stride is needed here since PLUMED does not know when the simulation is over
hhpsir: HISTOGRAM ARG=psi STRIDE=100 GRID_MIN=-pi GRID_MAX=pi GRID_BIN=600 BANDWIDTH=0.05 LOGWEIGHTS=lw
ffpsir: CONVERT_TO_FES GRID=hhpsir # no need to set TEMP here, PLUMED will obtain it from GROMACS
DUMPGRID GRID=ffpsir FILE=fes_psi_biased2r.dat STRIDE=200000 # stride is needed here since PLUMED does not know when the simulation is over
PRINT ARG=phi,psi FILE=colvar_biased2.dat STRIDE=100 # use this command to write phi and psi on a file named colvar.dat, every 100 steps
""",file=f)
subprocess.run("gmx mdrun -plumed plumed_biased2.dat -s topolA.tpr -nsteps 200000 -x traj_comp_biased2.xtc",shell=True)
colvar=plumed.read_as_pandas("colvar_biased2.dat")
plt.plot(colvar.time,colvar.phi,"x",label="phi")
plt.plot(colvar.time,colvar.psi,"x",label="psi")
plt.xlabel("time")
plt.ylabel("$\phi$")
plt.legend()
plt.show()
plt.plot(colvar.phi,colvar.psi,"x")
plt.xlabel("$\phi$")
plt.ylabel("$\psi$")
plt.xlim((-np.pi,np.pi))
plt.ylim((-np.pi,np.pi))
plt.show()
fes_phi=plumed.read_as_pandas("fes_phi.dat").replace([np.inf, -np.inf], np.nan).dropna()
plt.plot(fes_phi.phi,fes_phi.ffphi,label="original")
fes_phib=plumed.read_as_pandas("fes_phi_biased2.dat").replace([np.inf, -np.inf], np.nan).dropna()
plt.plot(fes_phib.phi,fes_phib.ffphi,label="biased")
fes_phir=plumed.read_as_pandas("fes_phi_biased2r.dat").replace([np.inf, -np.inf], np.nan).dropna()
plt.plot(fes_phir.phi,fes_phir.ffphir,label="reweighted")
plt.legend()
plt.xlim((-np.pi,np.pi))
plt.xlabel("$\phi$")
plt.ylabel("$F(\phi)$")
plt.show()
fes_psi=plumed.read_as_pandas("fes_psi.dat").replace([np.inf, -np.inf], np.nan).dropna()
plt.plot(fes_psi.psi,fes_psi.ffpsi,label="original")
fes_psib=plumed.read_as_pandas("fes_psi_biased2.dat").replace([np.inf, -np.inf], np.nan).dropna()
plt.plot(fes_psib.psi,fes_psib.ffpsi,label="biased")
fes_psir=plumed.read_as_pandas("fes_psi_biased2r.dat").replace([np.inf, -np.inf], np.nan).dropna()
plt.plot(fes_psir.psi,fes_psir.ffpsir,label="reweighted")
plt.legend()
plt.xlim((-np.pi,np.pi))
plt.xlabel("$\psi$")
plt.ylabel("$F(\psi)$")
plt.show()
Comments:
- The third simulation is even more biased towards the second minimum
- When "unbiasing" the corresponding profile, we clearly cannot recover the original (blue) profile. However, the correction clearly points in the correct direction, making the stability of the two minima closer
- Also in this case, in principle, the three profile could become identical if the simulations were run for a sufficient time. However, we will always have that the first simulation is more accurate in reproducing the shape of the first minimum, since it spends more time there. The third simulation will always be more accurate in the region with the largest phi values (>-1)
# we now concatenate the trajectories:
subprocess.run("gmx trjcat -cat -f traj_comp_unbiased.xtc traj_comp_biased1.xtc traj_comp_biased2.xtc -o traj_comp_cat.xtc",shell=True)
# then import the wham tool
import wham
print(wham.__file__)
# then construct a plumed input file to compute the bias potentials for
# the three potential energy functions along the entire trajectory
with open("plumed_wham.dat","w") as f:
print("""
# vim:ft=plumed
MOLINFO STRUCTURE=reference.pdb
phi: TORSION ATOMS=@phi-2
psi: TORSION ATOMS=@psi-2
# here we list the bias potentials that we used in the three simulations we want to combine
b0: CUSTOM ARG=phi FUNC=0.0 PERIODIC=NO
b1: CUSTOM ARG=phi FUNC=-10*sin(x+2) PERIODIC=NO
b2: CUSTOM ARG=phi FUNC=-20*sin(x+2) PERIODIC=NO
PRINT ARG=phi,psi,b0,b1,b2 FILE=biases.dat
""",file=f)
subprocess.run("plumed driver --plumed plumed_wham.dat --ixtc traj_comp_cat.xtc",shell=True)
bias=plumed.read_as_pandas("biases.dat")
bias
kBT=300*8.314462618*0.001
w=wham.wham(np.stack((bias.b0,bias.b1,bias.b2)).T,T=kBT)
w
# here we check how the weight depends on the biased CV
plt.plot(bias.phi,w["logW"],".")
# as a function of psi instead we don't see any feature
plt.plot(bias.psi,w["logW"],".")
# We store the logarithm of the weights as an additional column in the bias dataframe
bias["logweights"]=w["logW"]
bias
plumed.write_pandas(bias,"bias_wham.dat")
with open("plumed_wham.dat","w") as f:
print("""
# vim:ft=plumed
phi: READ FILE=bias_wham.dat VALUES=phi
psi: READ FILE=bias_wham.dat VALUES=psi
lw: READ FILE=bias_wham.dat VALUES=logweights
# use the command below to compute the histogram of phi
# we use a smooth kernel to produce a nicer graph here
hhphi: HISTOGRAM ARG=phi GRID_MIN=-pi GRID_MAX=pi GRID_BIN=600 BANDWIDTH=0.05
ffphi: CONVERT_TO_FES GRID=hhphi # no need to set TEMP here, PLUMED will obtain it from GROMACS
DUMPGRID GRID=ffphi FILE=fes_phi_cat.dat
hhpsi: HISTOGRAM ARG=psi GRID_MIN=-pi GRID_MAX=pi GRID_BIN=600 BANDWIDTH=0.05
ffpsi: CONVERT_TO_FES GRID=hhpsi # no need to set TEMP here, PLUMED will obtain it from GROMACS
DUMPGRID GRID=ffpsi FILE=fes_psi_cat.dat
# we use a smooth kernel to produce a nicer graph here
hhphir: HISTOGRAM ARG=phi GRID_MIN=-pi GRID_MAX=pi GRID_BIN=600 BANDWIDTH=0.05 LOGWEIGHTS=lw
ffphir: CONVERT_TO_FES GRID=hhphir # no need to set TEMP here, PLUMED will obtain it from GROMACS
DUMPGRID GRID=ffphir FILE=fes_phi_catr.dat
hhpsir: HISTOGRAM ARG=psi GRID_MIN=-pi GRID_MAX=pi GRID_BIN=600 BANDWIDTH=0.05 LOGWEIGHTS=lw
ffpsir: CONVERT_TO_FES GRID=hhpsir # no need to set TEMP here, PLUMED will obtain it from GROMACS
DUMPGRID GRID=ffpsir FILE=fes_psi_catr.dat
""",file=f)
kb=0.008314462618
T=300
subprocess.run("plumed driver --noatoms --plumed plumed_wham.dat --kt {}".format(kb*T),shell=True)
colvar=plumed.read_as_pandas("bias_wham.dat")
plt.plot(colvar.time,colvar.phi,"x",label="phi")
plt.plot(colvar.time,colvar.psi,"x",label="psi")
plt.xlabel("time")
plt.ylabel("$\phi$")
plt.legend()
plt.show()
plt.plot(colvar.phi,colvar.psi,"x")
plt.xlabel("$\phi$")
plt.ylabel("$\psi$")
plt.xlim((-np.pi,np.pi))
plt.ylim((-np.pi,np.pi))
plt.show()
fes_phi=plumed.read_as_pandas("fes_phi.dat").replace([np.inf, -np.inf], np.nan).dropna()
plt.plot(fes_phi.phi,fes_phi.ffphi,label="original")
fes_phib=plumed.read_as_pandas("fes_phi_cat.dat").replace([np.inf, -np.inf], np.nan).dropna()
plt.plot(fes_phib.phi,fes_phib.ffphi,label="biased")
fes_phir=plumed.read_as_pandas("fes_phi_catr.dat").replace([np.inf, -np.inf], np.nan).dropna()
plt.plot(fes_phir.phi,fes_phir.ffphir,label="reweighted")
plt.legend()
plt.xlim((-np.pi,np.pi))
plt.xlabel("$\phi$")
plt.ylabel("$F(\phi)$")
plt.show()
fes_psi=plumed.read_as_pandas("fes_psi.dat").replace([np.inf, -np.inf], np.nan).dropna()
plt.plot(fes_psi.psi,fes_psi.ffpsi,label="original")
fes_psib=plumed.read_as_pandas("fes_psi_cat.dat").replace([np.inf, -np.inf], np.nan).dropna()
plt.plot(fes_psib.psi,fes_psib.ffpsi,label="biased")
fes_psir=plumed.read_as_pandas("fes_psi_catr.dat").replace([np.inf, -np.inf], np.nan).dropna()
plt.plot(fes_psir.psi,fes_psir.ffpsir,label="reweighted")
plt.legend()
plt.xlim((-np.pi,np.pi))
plt.xlabel("$\psi$")
plt.ylabel("$F(\psi)$")
plt.show()
Comments:
- The concatenated trajectory has a clearly visible behavior as a function of time. Earlier times sample both minima, later times mostly sample the higher phi minimum
- The correction (see above) makes sure that the high phi samples are weighted less, so as to result in a correct free-energy profile
Exercise 4¶
We will now use the very common multiple-windows protocol. We first generate the windows centers and the corresponding plumed input files
at=np.linspace(-np.pi,np.pi,33)[:-1]
print(at)
for i in range(32):
with open("plumed_" + str(i) + ".dat","w") as f:
print("""
# vim:ft=plumed
MOLINFO STRUCTURE=reference.pdb
phi: TORSION ATOMS=@phi-2
psi: TORSION ATOMS=@psi-2
bb: RESTRAINT ARG=phi KAPPA=200.0 AT={}
PRINT ARG=phi,psi,bb.bias FILE=colvar_multi_{}.dat STRIDE=100
""".format(at[i],i),file=f)
# This is to run multiple simulations concurrently.
# Tune max_workers depending on how many processors you have.
# We here use deffnm so as to store separate log files as well, though it is not really necessary
import concurrent.futures
clean()
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
# technical note: each "submission" involves a separate "thread"
# however, the thread is then using subprocess.run to start a new "process"
# thus, the resulting simulations are run an truly separate processes
for i in range(32):
print("submitting",i)
executor.submit(subprocess.run,"gmx mdrun -deffnm run{} -plumed plumed_{}.dat -s topolA.tpr -nsteps 200000 -x traj_comp_{}.xtc".format(i,i,i),shell=True)
# now concatenate the trajectories:
subprocess.run("gmx_mpi trjcat -cat -f traj_comp_[0-9].xtc traj_comp_[0-9][0-9].xtc -o traj_multi_cat.xtc",shell=True)
# and analyze all of them.
# we are here using the same input files we used to run the simulations
# by using the --trajectory-stride option we really mimick what happened in the biased simulations
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
for i in range(32):
executor.submit(subprocess.run,"plumed driver --plumed plumed_{}.dat --ixtc traj_multi_cat.xtc --trajectory-stride 100".format(i),shell=True)
col=[]
for i in range(32):
col.append(plumed.read_as_pandas("colvar_multi_" + str(i)+".dat"))
# notice that this is the concatenation of 32 trajectories with 2001 frames each
plt.plot(col[i].phi[2001*i:2001*(i+1)],col[i].psi[2001*i:2001*(i+1)],"x")
plt.xlabel("$\phi$")
plt.ylabel("$\psi$")
plt.show()
# in this graph you can appreciate which region was sampled by each simulation
bias=np.zeros((len(col[0]["bb.bias"]),32))
for i in range(32):
bias[:,i]=col[i]["bb.bias"][-len(bias):]
w=wham.wham(bias,T=kBT)
plt.plot(w["logW"])
plt.show()
# we now produce a file to be read by plumed
colvar=col[0]
colvar["logweights"]=w["logW"]
plumed.write_pandas(colvar,"bias_multi.dat")
subprocess.run("plumed driver --noatoms --plumed plumed_multi.dat --kt {}".format(kb*T),shell=True)
colvar=plumed.read_as_pandas("bias_multi.dat")
plt.plot(colvar.time,colvar.phi,"x",label="phi")
plt.plot(colvar.time,colvar.psi,"x",label="psi")
plt.xlabel("time")
plt.ylabel("$\phi$")
plt.legend()
plt.show()
plt.plot(colvar.phi,colvar.psi,"x")
plt.xlabel("$\phi$")
plt.ylabel("$\psi$")
plt.xlim((-np.pi,np.pi))
plt.ylim((-np.pi,np.pi))
plt.show()
fes_phi=plumed.read_as_pandas("fes_phi.dat").replace([np.inf, -np.inf], np.nan).dropna()
plt.plot(fes_phi.phi,fes_phi.ffphi,label="original")
fes_phib=plumed.read_as_pandas("fes_phi_cat.dat").replace([np.inf, -np.inf], np.nan).dropna()
plt.plot(fes_phib.phi,fes_phib.ffphi,label="biased")
fes_phir=plumed.read_as_pandas("fes_phi_catr.dat").replace([np.inf, -np.inf], np.nan).dropna()
plt.plot(fes_phir.phi,fes_phir.ffphir,label="reweighted")
plt.legend()
plt.xlim((-np.pi,np.pi))
plt.xlabel("$\phi$")
plt.ylabel("$F(\phi)$")
plt.show()
fes_psi=plumed.read_as_pandas("fes_psi.dat").replace([np.inf, -np.inf], np.nan).dropna()
plt.plot(fes_psi.psi,fes_psi.ffpsi,label="original")
fes_psib=plumed.read_as_pandas("fes_psi_cat.dat").replace([np.inf, -np.inf], np.nan).dropna()
plt.plot(fes_psib.psi,fes_psib.ffpsi,label="biased")
fes_psir=plumed.read_as_pandas("fes_psi_catr.dat").replace([np.inf, -np.inf], np.nan).dropna()
plt.plot(fes_psir.psi,fes_psir.ffpsir,label="reweighted")
plt.legend()
plt.xlim((-np.pi,np.pi))
plt.xlabel("$\psi$")
plt.ylabel("$F(\psi)$")
plt.show()
Comments:
- The profile along phi now includes another minimum (phi~1). It was only possible to reach it thanks to the restraints.
- Variable psi is not able to distingush this third minimum from the previous ones. The free energy wrt psi can be anyway computed by reweighting
Exercise 5¶
We will now compute some additional observables and the associated statistical errors.
# number of blocks
NB=10
# we reshape the bias so that it appears as NB times frames of each traj times number of biases
bb=bias.reshape((32,-1,32))[:,-2000:,:].reshape((32,NB,2000//NB,32))
# we reshape the trajectory so that it appears as NB times frames of each traj times number of biases
cc=np.array(col[0].phi).reshape((32,-1))[:,-2000:].reshape((32,NB,2000//NB))
# we first analyse the complete trajectory:
tr=cc.flatten()
is_in_B=np.int_(np.logical_and(tr>0,tr<2))
is_in_A=np.int_(tr<0)
w0=wham.wham(bb.reshape((-1,32)),T=kBT)
print("population:",np.average(is_in_B,weights=np.exp(w0["logW"])))
print("Delta F:",-kBT*np.log(
np.average(is_in_B,weights=np.exp(w0["logW"]))/np.average(is_in_A,weights=np.exp(w0["logW"]))
)
)
sin_A=np.average(np.sin(tr),weights=np.exp(w0["logW"])*is_in_A)
cos_A=np.average(np.cos(tr),weights=np.exp(w0["logW"])*is_in_A)
sin_B=np.average(np.sin(tr),weights=np.exp(w0["logW"])*is_in_B)
cos_B=np.average(np.cos(tr),weights=np.exp(w0["logW"])*is_in_B)
print("<phi>_A:",np.arctan2(sin_A,cos_A))
print("<phi>_B:",np.arctan2(sin_B,cos_B))
pop=[]
deltaF=[]
phi_A=[]
phi_B=[]
for i in range(200):
# we then analyze the bootstrapped trajectories
print(i)
c=np.random.choice(NB,NB)
w=wham.wham(bb[:,c,:,:].reshape((-1,32)),T=kBT)
tr=cc[:,c,:].flatten()
is_in_B=np.int_(np.logical_and(tr>0,tr<2))
sin_A=np.average(np.sin(tr),weights=np.exp(w["logW"])*is_in_A)
cos_A=np.average(np.cos(tr),weights=np.exp(w["logW"])*is_in_A)
sin_B=np.average(np.sin(tr),weights=np.exp(w["logW"])*is_in_B)
cos_B=np.average(np.cos(tr),weights=np.exp(w["logW"])*is_in_B)
pop.append(np.average(is_in_B,weights=np.exp(w["logW"])))
deltaF.append(-kBT*np.log(
np.average(is_in_B,weights=np.exp(w["logW"]))/np.average(is_in_A,weights=np.exp(w["logW"]))
))
phi_A.append(np.arctan2(sin_A,cos_A))
phi_B.append(np.arctan2(sin_B,cos_B))
print("error population:",np.std(pop))
print("error Delta F:",np.std(deltaF))
print("error <phi>_A:",np.std(phi_A))
print("error <phi>_B:",np.std(phi_B))
Comments:
- We are here analyzing the full trajectories. It is common practice to discard the initial part of the trajectory.
- Be careful when computing average angles! In this case periodicity is fine (by chance), but in general the approach of using circular averages (average sin and cos followed by arctan2) is much safer (https://en.wikipedia.org/wiki/Mean_of_circular_quantities)
- Bootstrap can be slow if you re-perform wham every time. The provided wham script could be optimized e.g.:
- Starting from the w0 logZ instead of starting from scratch at every iteration
- Converting the self-consistent procedure to a minimization (see Tan, Gallicchio, Lapelosa, and Levy et al JCTC 2012)
- Computing log(PA/PB) might lead to infinities when doing bootstrap. A more robust alternative is to use Bayesian bootstrap. See e.g. Hub et al, JCTC 2010.
Exercise 6¶
We will now repeat exercise 4 but using a starting from the other minimum.
at=np.linspace(-np.pi,np.pi,33)[:-1]
print(at)
for i in range(32):
with open("plumed_" + str(i) + ".dat","w") as f:
print("""
# vim:ft=plumed
MOLINFO STRUCTURE=reference.pdb
phi: TORSION ATOMS=@phi-2
psi: TORSION ATOMS=@psi-2
bb: RESTRAINT ARG=phi KAPPA=200.0 AT={}
PRINT ARG=phi,psi,bb.bias FILE=colvar_multi_B_{}.dat STRIDE=100
""".format(at[i],i),file=f)
# This is to run multiple simulations concurrently.
# Tune max_workers depending on how many processors you have.
# We here use deffnm so as to store separate log files as well, though it is not really necessary
import concurrent.futures
clean()
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
# technical note: each "submission" involves a separate "thread"
# however, the thread is then using subprocess.run to start a new "process"
# thus, the resulting simulations are run an truly separate processes
for i in range(32):
print("submitting",i)
executor.submit(subprocess.run,"gmx mdrun -deffnm run{} -plumed plumed_{}.dat -s topolB.tpr -nsteps 200000 -x traj_comp_B_{}.xtc".format(i,i,i),shell=True)
# now concatenate the trajectories:
subprocess.run("gmx_mpi trjcat -cat -f traj_comp_B_[0-9].xtc traj_comp_B_[0-9][0-9].xtc -o traj_multi_B_cat.xtc",shell=True)
# and analyze all of them.
# we are here using the same input files we used to run the simulations
# by using the --trajectory-stride option we really mimick what happened in the biased simulations
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
for i in range(32):
executor.submit(subprocess.run,"plumed driver --plumed plumed_{}.dat --ixtc traj_multi_B_cat.xtc --trajectory-stride 100".format(i),shell=True)
col=[]
for i in range(32):
col.append(plumed.read_as_pandas("colvar_multi_B_" + str(i)+".dat"))
# notice that this is the concatenation of 32 trajectories with 2001 frames each
plt.plot(col[i].phi[2001*i:2001*(i+1)],col[i].psi[2001*i:2001*(i+1)],"x")
plt.xlabel("$\phi$")
plt.ylabel("$\psi$")
plt.show()
# in this graph you can appreciate which region was sampled by each simulation
bias=np.zeros((len(col[0]["bb.bias"]),32))
for i in range(32):
bias[:,i]=col[i]["bb.bias"][-len(bias):]
w=wham.wham(bias,T=kBT)
plt.plot(w["logW"])
plt.show()
colvar=col[0]
colvar["logweights"]=w["logW"]
plumed.write_pandas(colvar,"bias_multi_B.dat")
# and analyze it as we did before
with open("plumed_multi_B.dat","w") as f:
print("""
# vim:ft=plumed
phi: READ FILE=bias_multi_B.dat VALUES=phi IGNORE_TIME
psi: READ FILE=bias_multi_B.dat VALUES=psi IGNORE_TIME
lw: READ FILE=bias_multi_B.dat VALUES=logweights IGNORE_TIME
# use the command below to compute the histogram of phi
# we use a smooth kernel to produce a nicer graph here
hhphi: HISTOGRAM ARG=phi GRID_MIN=-pi GRID_MAX=pi GRID_BIN=600 BANDWIDTH=0.05
ffphi: CONVERT_TO_FES GRID=hhphi # no need to set TEMP here, PLUMED will obtain it from GROMACS
DUMPGRID GRID=ffphi FILE=fes_phi_B_cat.dat
hhpsi: HISTOGRAM ARG=psi GRID_MIN=-pi GRID_MAX=pi GRID_BIN=600 BANDWIDTH=0.05
ffpsi: CONVERT_TO_FES GRID=hhpsi # no need to set TEMP here, PLUMED will obtain it from GROMACS
DUMPGRID GRID=ffpsi FILE=fes_psi_B_cat.dat
# we use a smooth kernel to produce a nicer graph here
hhphir: HISTOGRAM ARG=phi GRID_MIN=-pi GRID_MAX=pi GRID_BIN=600 BANDWIDTH=0.05 LOGWEIGHTS=lw
ffphir: CONVERT_TO_FES GRID=hhphir # no need to set TEMP here, PLUMED will obtain it from GROMACS
DUMPGRID GRID=ffphir FILE=fes_phi_B_catr.dat
hhpsir: HISTOGRAM ARG=psi GRID_MIN=-pi GRID_MAX=pi GRID_BIN=600 BANDWIDTH=0.05 LOGWEIGHTS=lw
ffpsir: CONVERT_TO_FES GRID=hhpsir # no need to set TEMP here, PLUMED will obtain it from GROMACS
DUMPGRID GRID=ffpsir FILE=fes_psi_B_catr.dat
""",file=f)
subprocess.run("plumed driver --noatoms --plumed plumed_multi_B.dat --kt {}".format(kb*T),shell=True)
Comments:
- The distributions are very similar and in A and B simulations. Indeed, in both cases we forced them to be as flat as possible
- The reweighted profiles are not identical!
- In the region at negative phi they are undistinguishable. Indeed, the transitions between the two minima are fast, and there is no reason to expect a different relative stability
- The difference between the minimum at negative phi and at positive phi is different. In particular, if starting in B, the minimum in B seems more stable. This is a general rule (based on experience of who is writing...)
- The difference between two different initialization protocols might be larger than the statistical error estimated by bootstrap! The difference actually would probably be smaller if one were discarding the initial part of the trajectory.
Extra checks¶
We here re-perfrom some of the analysis discarding the initial part of the trajectory. We first define a function that prints the free-energy difference between the two minima only keeping a given number of frames.
def analyze_keeping_last(keep_last):
col=[]
for i in range(32):
col.append(plumed.read_as_pandas("colvar_multi_" + str(i)+".dat"))
bias=np.zeros((len(col[0]["bb.bias"]),32))
for i in range(32):
bias[:,i]=col[i]["bb.bias"][:]
tr=col[i]["phi"]
# discard initial part of each simulation
bias=bias.reshape((32,-1,32))[:,-keep_last:,:].reshape((-1,32))
tr=np.array(tr).reshape((32,-1))[:,-keep_last:].flatten()
print(bias.shape,tr.shape)
w=wham.wham(bias,T=kBT)
print(-kBT*np.log(np.sum(np.exp(w["logW"])*(np.logical_and(tr>0,tr<2)))/np.sum(np.exp(w["logW"])*(tr<0.0))))
col=[]
for i in range(32):
col.append(plumed.read_as_pandas("colvar_multi_B_" + str(i)+".dat"))
bias=np.zeros((len(col[0]["bb.bias"]),32))
for i in range(32):
bias[:,i]=col[i]["bb.bias"][:]
tr=col[i]["phi"]
# discard initial part of each simulation
bias=bias.reshape((32,-1,32))[:,-keep_last:,:].reshape((-1,32))
tr=np.array(tr).reshape((32,-1))[:,-keep_last:].flatten()
print(bias.shape,tr.shape)
w=wham.wham(bias,T=kBT)
print(-kBT*np.log(np.sum(np.exp(w["logW"])*(np.logical_and(tr>0,tr<2)))/np.sum(np.exp(w["logW"])*(tr<0.0))))
analyze_keeping_last(2000)
analyze_keeping_last(1000)
As you can see, results are getting closer. However, the difference between the two estimates of Delta F is still quite large.