Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2014-2019 The plumed team
3 : (see the PEOPLE file at the root of the distribution for a list of names)
4 :
5 : See http://www.plumed.org for more information.
6 :
7 : This file is part of plumed, version 2.
8 :
9 : plumed is free software: you can redistribute it and/or modify
10 : it under the terms of the GNU Lesser General Public License as published by
11 : the Free Software Foundation, either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : plumed is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU Lesser General Public License for more details.
18 :
19 : You should have received a copy of the GNU Lesser General Public License
20 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 : #include "core/ActionWithValue.h"
23 : #include "core/ActionAtomistic.h"
24 : #include "core/ActionWithArguments.h"
25 : #include "reference/MultiReferenceBase.h"
26 : #include "reference/MetricRegister.h"
27 : #include "core/ActionRegister.h"
28 : #include "core/PlumedMain.h"
29 : #include "reference/Direction.h"
30 : #include "tools/Pbc.h"
31 :
32 : //+PLUMEDOC COLVAR PCAVARS
33 : /*
34 : Projection on principal component eigenvectors or other high dimensional linear subspace
35 :
36 : The collective variables described in \ref dists allow one to calculate the distance between the
37 : instaneous structure adopted by the system and some high-dimensional, reference configuration. The
38 : problem with doing this is that, as one gets further and further from the reference configuration, the
39 : distance from it becomes a progressively poorer and poorer collective variable. This happens because
40 : the ``number" of structures at a distance \f$d\f$ from a reference configuration is proportional to \f$d^N\f$ in
41 : an \f$N\f$ dimensional space. Consequently, when \f$d\f$ is small the distance from the reference configuration
42 : may well be a good collective variable. However, when \f$d\f$ is large it is unlikely that the distance from the reference
43 : structure is a good CV. When the distance is large there will almost certainly be markedly different
44 : configuration that have the same CV value and hence barriers in transverse degrees of
45 : freedom.
46 :
47 : For these reasons dimensionality reduction is often employed so a projection \f$\mathbf{s}\f$ of a high-dimensional configuration
48 : \f$\mathbf{X}\f$ in a lower dimensionality space using a function:
49 :
50 : \f[
51 : \mathbf{s} = F(\mathbf{X}-\mathbf{X}^{ref})
52 : \f]
53 :
54 : where here we have introduced some high-dimensional reference configuration \f$\mathbf{X}^{ref}\f$. By far the simplest way to
55 : do this is to use some linear operator for \f$F\f$. That is to say we find a low-dimensional projection
56 : by rotating the basis vectors using some linear algebra:
57 :
58 : \f[
59 : \mathbf{s}_i = \sum_k A_{ik} ( X_{k} - X_{k}^{ref} )
60 : \f]
61 :
62 : Here \f$A\f$ is a \f$d\f$ by \f$D\f$ matrix where \f$D\f$ is the dimensionality of the high dimensional space and \f$d\f$ is
63 : the dimensionality of the lower dimensional subspace. In plumed when this kind of projection you can use the majority
64 : of the metrics detailed on \ref dists to calculate the displacement, \f$\mathbf{X}-\mathbf{X}^{ref}\f$, from the reference configuration.
65 : The matrix \f$A\f$ can be found by various means including principal component analysis and normal mode analysis. In both these methods the
66 : rows of \f$A\f$ would be the principle eigenvectors of a square matrix. For PCA the covariance while for normal modes the Hessian.
67 :
68 : \bug It is not possible to use the \ref DRMSD metric with this variable. You can get around this by listing the set of distances you wish to calculate for your DRMSD in the plumed file explicitally and using the EUCLIDEAN metric. MAHALONOBIS and NORM-EUCLIDEAN also do not work with this variable but using these options makes little sense when projecting on a linear subspace.
69 :
70 : \par Examples
71 :
72 : The following input calculates a projection on a linear subspace where the displacements
73 : from the reference configuration are calculated using the OPTIMAL metric. Consequently,
74 : both translation of the center of mass of the atoms and rotation of the reference
75 : frame are removed from these displacements. The matrix \f$A\f$ and the reference
76 : configuration \f$R^{ref}\f$ are specified in the pdb input file reference.pdb and the
77 : value of all projections (and the residual) are output to a file called colvar2.
78 :
79 : \plumedfile
80 : PCAVARS REFERENCE=reference.pdb TYPE=OPTIMAL LABEL=pca2
81 : PRINT ARG=pca2.* FILE=colvar2
82 : \endplumedfile
83 :
84 : The reference configurations can be specified using a pdb file. The first configuration that you provide is the reference configuration,
85 : which is refered to in the above as \f$X^{ref}\f$ subsequent configurations give the directions of row vectors that are contained in
86 : the matrix \f$A\f$ above. These directions can be specified by specifying a second configuration - in this case a vector will
87 : be constructed by calculating the displacement of this second configuration from the reference configuration. A pdb input prepared
88 : in this way would look as follows:
89 :
90 : \verbatim
91 : ATOM 2 CH3 ACE 1 12.932 -14.718 -6.016 1.00 1.00
92 : ATOM 5 C ACE 1 21.312 -9.928 -5.946 1.00 1.00
93 : ATOM 9 CA ALA 2 19.462 -11.088 -8.986 1.00 1.00
94 : ATOM 13 HB2 ALA 2 21.112 -10.688 -12.476 1.00 1.00
95 : ATOM 15 C ALA 2 19.422 7.978 -14.536 1.00 1.00
96 : ATOM 20 HH31 NME 3 20.122 -9.928 -17.746 1.00 1.00
97 : ATOM 21 HH32 NME 3 18.572 -13.148 -16.346 1.00 1.00
98 : END
99 : ATOM 2 CH3 ACE 1 13.932 -14.718 -6.016 1.00 1.00
100 : ATOM 5 C ACE 1 20.312 -9.928 -5.946 1.00 1.00
101 : ATOM 9 CA ALA 2 18.462 -11.088 -8.986 1.00 1.00
102 : ATOM 13 HB2 ALA 2 20.112 -11.688 -12.476 1.00 1.00
103 : ATOM 15 C ALA 2 19.422 7.978 -12.536 1.00 1.00
104 : ATOM 20 HH31 NME 3 20.122 -9.928 -17.746 1.00 1.00
105 : ATOM 21 HH32 NME 3 18.572 -13.148 -16.346 1.00 1.00
106 : END
107 : \endverbatim
108 :
109 : Alternatively, the second configuration can specify the components of \f$A\f$ explicitally. In this case you need to include the
110 : keyword TYPE=DIRECTION in the remarks to the pdb as shown below.
111 :
112 : \verbatim
113 : ATOM 2 CH3 ACE 1 12.932 -14.718 -6.016 1.00 1.00
114 : ATOM 5 C ACE 1 21.312 -9.928 -5.946 1.00 1.00
115 : ATOM 9 CA ALA 2 19.462 -11.088 -8.986 1.00 1.00
116 : ATOM 13 HB2 ALA 2 21.112 -10.688 -12.476 1.00 1.00
117 : ATOM 15 C ALA 2 19.422 7.978 -14.536 1.00 1.00
118 : ATOM 20 HH31 NME 3 20.122 -9.928 -17.746 1.00 1.00
119 : ATOM 21 HH32 NME 3 18.572 -13.148 -16.346 1.00 1.00
120 : END
121 : REMARK TYPE=DIRECTION
122 : ATOM 2 CH3 ACE 1 0.1414 0.3334 -0.0302 1.00 0.00
123 : ATOM 5 C ACE 1 0.0893 -0.1095 -0.1434 1.00 0.00
124 : ATOM 9 CA ALA 2 0.0207 -0.321 0.0321 1.00 0.00
125 : ATOM 13 HB2 ALA 2 0.0317 -0.6085 0.0783 1.00 0.00
126 : ATOM 15 C ALA 2 0.1282 -0.4792 0.0797 1.00 0.00
127 : ATOM 20 HH31 NME 3 0.0053 -0.465 0.0309 1.00 0.00
128 : ATOM 21 HH32 NME 3 -0.1019 -0.4261 -0.0082 1.00 0.00
129 : END
130 : \endverbatim
131 :
132 : If your metric involves arguments the labels of these arguments in your plumed input file should be specified in the REMARKS
133 : for each of the frames of your path. An input file in this case might look like this:
134 :
135 : \verbatim
136 : DESCRIPTION: a pca eigenvector specified using the start point and direction in the HD space.
137 : REMARK WEIGHT=1.0
138 : REMARK ARG=d1,d2
139 : REMARK d1=1.0 d2=1.0
140 : END
141 : REMARK TYPE=DIRECTION
142 : REMARK ARG=d1,d2
143 : REMARK d1=0.1 d2=0.25
144 : END
145 : \endverbatim
146 :
147 : Here we are working with the EUCLIDEAN metric and notice that we have specified the components of \f$A\f$ using DIRECTION.
148 : Consequently, the values of d1 and d2 in the second frame above do not specify a particular coordinate in the high-dimensional
149 : space as in they do in the first frame. Instead these values are the coefficients that can be used to construct a linear combination of d1 and d2.
150 : If we wanted to specify the direction in this metric using the start and end point of the vector we would write:
151 :
152 : \verbatim
153 : DESCRIPTION: a pca eigenvector specified using the start and end point of a vector in the HD space.
154 : REMARK WEIGHT=1.0
155 : REMARK ARG=d1,d2
156 : REMARK d1=1.0 d2=1.0
157 : END
158 : REMARK ARG=d1,d2
159 : REMARK d1=1.1 d2=1.25
160 : END
161 : \endverbatim
162 :
163 : */
164 : //+ENDPLUMEDOC
165 :
166 : namespace PLMD {
167 : namespace mapping {
168 :
169 : class PCAVars :
170 : public ActionWithValue,
171 : public ActionAtomistic,
172 : public ActionWithArguments
173 : {
174 : private:
175 : /// The holders for the derivatives
176 : MultiValue myvals;
177 : ReferenceValuePack mypack;
178 : /// The position of the reference configuration (the one we align to)
179 : ReferenceConfiguration* myref;
180 : /// The eigenvectors we are interested in
181 : std::vector<Direction> directions;
182 : /// Stuff for applying forces
183 : std::vector<double> forces, forcesToApply;
184 : public:
185 : static void registerKeywords( Keywords& keys );
186 : explicit PCAVars(const ActionOptions&);
187 : ~PCAVars();
188 : unsigned getNumberOfDerivatives();
189 : void lockRequests();
190 : void unlockRequests();
191 : void calculateNumericalDerivatives( ActionWithValue* a );
192 : void calculate();
193 : void apply();
194 : };
195 :
196 4825 : PLUMED_REGISTER_ACTION(PCAVars,"PCAVARS")
197 :
198 5 : void PCAVars::registerKeywords( Keywords& keys ) {
199 5 : Action::registerKeywords( keys );
200 5 : ActionWithValue::registerKeywords( keys );
201 5 : ActionAtomistic::registerKeywords( keys );
202 5 : ActionWithArguments::registerKeywords( keys );
203 5 : componentsAreNotOptional(keys);
204 5 : keys.addOutputComponent("eig","default","the projections on each eigenvalue are stored on values labeled eig-1, eig-2, ...");
205 : keys.addOutputComponent("residual","default","the distance of the configuration from the linear subspace defined "
206 : "by the vectors, \\f$e_i\\f$, that are contained in the rows of \\f$A\\f$. In other words this is "
207 : "\\f$\\sqrt( r^2 - \\sum_i [\\mathbf{r}.\\mathbf{e_i}]^2)\\f$ where "
208 : "\\f$r\\f$ is the distance between the instantaneous position and the "
209 5 : "reference point.");
210 5 : keys.add("compulsory","REFERENCE","a pdb file containing the reference configuration and configurations that define the directions for each eigenvector");
211 5 : keys.add("compulsory","TYPE","OPTIMAL","The method we are using for alignment to the reference structure");
212 5 : }
213 :
214 4 : PCAVars::PCAVars(const ActionOptions& ao):
215 : Action(ao),
216 : ActionWithValue(ao),
217 : ActionAtomistic(ao),
218 : ActionWithArguments(ao),
219 : myvals(1,0),
220 4 : mypack(0,0,myvals)
221 : {
222 :
223 : // What type of distance are we calculating
224 4 : std::string mtype; parse("TYPE",mtype);
225 :
226 : // Open reference file
227 8 : std::string reference; parse("REFERENCE",reference);
228 4 : FILE* fp=fopen(reference.c_str(),"r");
229 4 : if(!fp) error("could not open reference file " + reference );
230 :
231 : // Read all reference configurations
232 8 : MultiReferenceBase myframes( "", false );
233 4 : bool do_read=true; unsigned nfram=0;
234 20 : while (do_read) {
235 16 : PDB mypdb;
236 : // Read the pdb file
237 16 : do_read=mypdb.readFromFilepointer(fp,plumed.getAtoms().usingNaturalUnits(),0.1/atoms.getUnits().getLength());
238 : // Fix argument names
239 16 : expandArgKeywordInPDB( mypdb );
240 16 : if(do_read) {
241 12 : if( nfram==0 ) {
242 4 : myref = metricRegister().create<ReferenceConfiguration>( mtype, mypdb );
243 4 : Direction* tdir = dynamic_cast<Direction*>( myref );
244 4 : if( tdir ) error("first frame should be reference configuration - not direction of vector");
245 4 : if( !myref->pcaIsEnabledForThisReference() ) error("can't do PCA with reference type " + mtype );
246 8 : std::vector<std::string> remarks( mypdb.getRemark() ); std::string rtype;
247 4 : bool found=Tools::parse( remarks, "TYPE", rtype );
248 4 : if(!found) { std::vector<std::string> newrem(1); newrem[0]="TYPE="+mtype; mypdb.addRemark(newrem); }
249 8 : myframes.readFrame( mypdb );
250 8 : } else myframes.readFrame( mypdb );
251 12 : nfram++;
252 : } else {
253 4 : break;
254 : }
255 12 : }
256 4 : fclose(fp);
257 :
258 4 : if( nfram<=1 ) error("no eigenvectors were specified");
259 4 : log.printf(" found %u eigenvectors in file %s \n",nfram-1,reference.c_str() );
260 :
261 : // Finish the setup of the mapping object
262 : // Get the arguments and atoms that are required
263 8 : std::vector<AtomNumber> atoms; std::vector<std::string> args;
264 4 : myframes.getAtomAndArgumentRequirements( atoms, args );
265 8 : requestAtoms( atoms ); std::vector<Value*> req_args;
266 4 : interpretArgumentList( args, req_args ); requestArguments( req_args );
267 :
268 : // Setup the derivative pack
269 4 : if( atoms.size()>0 ) myvals.resize( 1, args.size() + 3*atoms.size() + 9 );
270 0 : else myvals.resize( 1, args.size() );
271 4 : mypack.resize( args.size(), atoms.size() );
272 4 : for(unsigned i=0; i<atoms.size(); ++i) mypack.setAtomIndex( i, i );
273 : /// This sets up all the storage data required by PCA in the pack
274 4 : myframes.getFrame(0)->setupPCAStorage( mypack );
275 :
276 : // Retrieve the position of the first frame, as we use this for alignment
277 4 : myref->setNamesAndAtomNumbers( atoms, args );
278 : // Check there are no periodic arguments
279 4 : for(unsigned i=0; i<getNumberOfArguments(); ++i) {
280 0 : if( getPntrToArgument(i)->isPeriodic() ) error("cannot use periodic variables in pca projections");
281 : }
282 : // Work out if the user wants to normalise the input vector
283 4 : checkRead();
284 :
285 : // Resize the matrices that will hold our eivenvectors
286 12 : for(unsigned i=1; i<nfram; ++i) {
287 8 : directions.push_back( Direction(ReferenceConfigurationOptions("DIRECTION")));
288 8 : directions[i-1].setNamesAndAtomNumbers( atoms, args );
289 : }
290 :
291 : // Create fake periodic boundary condition (these would only be used for DRMSD which is not allowed)
292 : // Now calculate the eigenvectors
293 12 : for(unsigned i=1; i<nfram; ++i) {
294 8 : myframes.getFrame(i)->extractDisplacementVector( myref->getReferencePositions(), getArguments(), myref->getReferenceArguments(), true, directions[i-1] );
295 : // Create a component to store the output
296 8 : std::string num; Tools::convert( i, num );
297 8 : addComponentWithDerivatives("eig-"+num); componentIsNotPeriodic("eig-"+num);
298 8 : }
299 4 : addComponentWithDerivatives("residual"); componentIsNotPeriodic("residual");
300 :
301 : // Get appropriate number of derivatives
302 : unsigned nder;
303 4 : if( getNumberOfAtoms()>0 ) {
304 4 : nder = 3*getNumberOfAtoms() + 9 + getNumberOfArguments();
305 : } else {
306 0 : nder = getNumberOfArguments();
307 : }
308 :
309 : // Resize all derivative arrays
310 4 : forces.resize( nder ); forcesToApply.resize( nder );
311 8 : for(unsigned i=0; i<getNumberOfComponents(); ++i) getPntrToComponent(i)->resizeDerivatives(nder);
312 4 : }
313 :
314 16 : PCAVars::~PCAVars() {
315 4 : delete myref;
316 12 : }
317 :
318 36 : unsigned PCAVars::getNumberOfDerivatives() {
319 36 : if( getNumberOfAtoms()>0 ) {
320 36 : return 3*getNumberOfAtoms() + 9 + getNumberOfArguments();
321 : }
322 0 : return getNumberOfArguments();
323 : }
324 :
325 44 : void PCAVars::lockRequests() {
326 44 : ActionWithArguments::lockRequests();
327 44 : ActionAtomistic::lockRequests();
328 44 : }
329 :
330 44 : void PCAVars::unlockRequests() {
331 44 : ActionWithArguments::unlockRequests();
332 44 : ActionAtomistic::unlockRequests();
333 44 : }
334 :
335 44 : void PCAVars::calculate() {
336 : // Clear the reference value pack
337 44 : mypack.clear();
338 : // Calculate distance between instaneous configuration and reference
339 44 : double dist = myref->calculate( getPositions(), getPbc(), getArguments(), mypack, true );
340 :
341 : // Start accumulating residual by adding derivatives of distance
342 44 : Value* resid=getPntrToComponent( getNumberOfComponents()-1 ); unsigned nargs=getNumberOfArguments();
343 44 : for(unsigned j=0; j<getNumberOfArguments(); ++j) resid->addDerivative( j, mypack.getArgumentDerivative(j) );
344 352 : for(unsigned j=0; j<getNumberOfAtoms(); ++j) {
345 308 : Vector ader=mypack.getAtomDerivative( j );
346 308 : for(unsigned k=0; k<3; ++k) resid->addDerivative( nargs +3*j+k, ader[k] );
347 : }
348 : // Retrieve the values of all arguments
349 44 : std::vector<double> args( getNumberOfArguments() ); for(unsigned i=0; i<getNumberOfArguments(); ++i) args[i]=getArgument(i);
350 :
351 : // Now calculate projections on pca vectors
352 44 : Vector adif, ader; Tensor fvir, tvir;
353 132 : for(unsigned i=0; i<getNumberOfComponents()-1; ++i) { // One less component as we also have residual
354 88 : double proj=myref->projectDisplacementOnVector( directions[i], getArguments(), args, mypack );
355 : // And now accumulate derivatives
356 88 : Value* eid=getPntrToComponent(i);
357 88 : for(unsigned j=0; j<getNumberOfArguments(); ++j) eid->addDerivative( j, mypack.getArgumentDerivative(j) );
358 88 : if( getNumberOfAtoms()>0 ) {
359 88 : tvir.zero();
360 704 : for(unsigned j=0; j<getNumberOfAtoms(); ++j) {
361 616 : Vector myader=mypack.getAtomDerivative(j);
362 2464 : for(unsigned k=0; k<3; ++k) {
363 1848 : eid->addDerivative( nargs + 3*j+k, myader[k] );
364 1848 : resid->addDerivative( nargs + 3*j+k, -2*proj*myader[k] );
365 : }
366 616 : tvir += -1.0*Tensor( getPosition(j), myader );
367 : }
368 352 : for(unsigned j=0; j<3; ++j) {
369 264 : for(unsigned k=0; k<3; ++k) eid->addDerivative( nargs + 3*getNumberOfAtoms() + 3*j + k, tvir(j,k) );
370 : }
371 : }
372 88 : dist -= proj*proj; // Subtract square from total squared distance to get residual squared
373 : // Derivatives of residual
374 88 : for(unsigned j=0; j<getNumberOfArguments(); ++j) resid->addDerivative( j, -2*proj*eid->getDerivative(j) );
375 : // for(unsigned j=0;j<getNumberOfArguments();++j) resid->addDerivative( j, -2*proj*arg_eigv(i,j) );
376 : // And set final value
377 88 : getPntrToComponent(i)->set( proj );
378 : }
379 44 : dist=sqrt(dist);
380 44 : resid->set( dist );
381 :
382 : // Take square root of residual derivatives
383 44 : double prefactor = 0.5 / dist;
384 44 : for(unsigned j=0; j<getNumberOfArguments(); ++j) resid->setDerivative( j, prefactor*resid->getDerivative(j) );
385 352 : for(unsigned j=0; j<getNumberOfAtoms(); ++j) {
386 308 : for(unsigned k=0; k<3; ++k) resid->setDerivative( nargs + 3*j+k, prefactor*resid->getDerivative( nargs+3*j+k ) );
387 : }
388 :
389 : // And finally virial for residual
390 44 : if( getNumberOfAtoms()>0 ) {
391 44 : tvir.zero();
392 352 : for(unsigned j=0; j<getNumberOfAtoms(); ++j) {
393 308 : Vector ader; for(unsigned k=0; k<3; ++k) ader[k]=resid->getDerivative( nargs + 3*j+k );
394 308 : tvir += -1.0*Tensor( getPosition(j), ader );
395 : }
396 176 : for(unsigned j=0; j<3; ++j) {
397 132 : for(unsigned k=0; k<3; ++k) resid->addDerivative( nargs + 3*getNumberOfAtoms() + 3*j + k, tvir(j,k) );
398 : }
399 44 : }
400 :
401 44 : }
402 :
403 0 : void PCAVars::calculateNumericalDerivatives( ActionWithValue* a ) {
404 0 : if( getNumberOfArguments()>0 ) {
405 0 : ActionWithArguments::calculateNumericalDerivatives( a );
406 : }
407 0 : if( getNumberOfAtoms()>0 ) {
408 0 : Matrix<double> save_derivatives( getNumberOfComponents(), getNumberOfArguments() );
409 0 : for(unsigned j=0; j<getNumberOfComponents(); ++j) {
410 0 : for(unsigned i=0; i<getNumberOfArguments(); ++i) save_derivatives(j,i)=getPntrToComponent(j)->getDerivative(i);
411 : }
412 0 : calculateAtomicNumericalDerivatives( a, getNumberOfArguments() );
413 0 : for(unsigned j=0; j<getNumberOfComponents(); ++j) {
414 0 : for(unsigned i=0; i<getNumberOfArguments(); ++i) getPntrToComponent(j)->addDerivative( i, save_derivatives(j,i) );
415 0 : }
416 : }
417 0 : }
418 :
419 44 : void PCAVars::apply() {
420 :
421 44 : bool wasforced=false; forcesToApply.assign(forcesToApply.size(),0.0);
422 176 : for(unsigned i=0; i<getNumberOfComponents(); ++i) {
423 132 : if( getPntrToComponent(i)->applyForce( forces ) ) {
424 0 : wasforced=true;
425 0 : for(unsigned i=0; i<forces.size(); ++i) forcesToApply[i]+=forces[i];
426 : }
427 : }
428 44 : if( wasforced ) {
429 0 : addForcesOnArguments( forcesToApply );
430 0 : if( getNumberOfAtoms()>0 ) setForcesOnAtoms( forcesToApply, getNumberOfArguments() );
431 : }
432 :
433 44 : }
434 :
435 : }
436 4821 : }
|