Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2012-2019 The plumed team
3 : (see the PEOPLE file at the root of the distribution for a list of names)
4 :
5 : See http://www.plumed.org for more information.
6 :
7 : This file is part of plumed, version 2.
8 :
9 : plumed is free software: you can redistribute it and/or modify
10 : it under the terms of the GNU Lesser General Public License as published by
11 : the Free Software Foundation, either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : plumed is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU Lesser General Public License for more details.
18 :
19 : You should have received a copy of the GNU Lesser General Public License
20 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 : #include "KernelFunctions.h"
23 : #include "IFile.h"
24 : #include <iostream>
25 : #include <cmath>
26 :
27 : namespace PLMD {
28 :
29 : //+PLUMEDOC INTERNAL kernelfunctions
30 : /*
31 : Functions that are used to construct histograms
32 :
33 : Constructing histograms is something you learnt to do relatively early in life. You perform an experiment a number of times,
34 : count the number of times each result comes up and then draw a bar graph that describes how often each of the results came up.
35 : This only works when there are a finite number of possible results. If the result a number between 0 and 1 the bar chart is
36 : less easy to draw as there are as many possible results as there are numbers between zero and one - an infinite number.
37 : To resolve this problem we replace probability, \f$P\f$ with probability density, \f$\pi\f$, and write the probability of getting
38 : a number between \f$a\f$ and \f$b\f$ as:
39 :
40 : \f[
41 : P = \int_{a}^b \textrm{d}x \pi(x)
42 : \f]
43 :
44 : To calculate probability densities from a set of results we use a process called kernel density estimation.
45 : Histograms are accumulated by adding up kernel functions, \f$K\f$, with finite spatial extent, that integrate to one.
46 : These functions are centered on each of the \f$n\f$-dimensional data points, \f$\mathbf{x}_i\f$. The overall effect of this
47 : is that each result we obtain in our experiments contributes to the probability density in a finite sized region of the space.
48 :
49 : Expressing all this mathematically in kernel density estimation we write the probability density as:
50 :
51 : \f[
52 : \pi(\mathbf{x}) = \sum_i K\left[ (\mathbf{x} - \mathbf{x}_i)^T \Sigma (\mathbf{x} - \mathbf{x}_i) \right]
53 : \f]
54 :
55 : where \f$\Sigma\f$ is an \f$n \times n\f$ matrix called the bandwidth that controls the spatial extent of
56 : the kernel. Whenever we accumulate a histogram (e.g. in \ref HISTOGRAM or in \ref METAD) we use this
57 : technique.
58 :
59 : There is thus some flexibility in the particular function we use for \f$K[\mathbf{r}]\f$ in the above.
60 : The following variants are available.
61 :
62 : <table align=center frame=void width=95%% cellpadding=5%%>
63 : <tr>
64 : <td> TYPE </td> <td> FUNCTION </td>
65 : </tr> <tr>
66 : <td> gaussian </td> <td> \f$f(r) = \frac{1}{(2 \pi)^{n} \sqrt{|\Sigma^{-1}|}} \exp\left(-0.5 r^2 \right)\f$ </td>
67 : </tr> <tr>
68 : <td> truncated-gaussian </td> <td> \f$f(r) = \frac{1}{(2 \pi)^{n} \sqrt{|\Sigma^{-1}|} \left(\frac{\mathrm{erf}(-6.25/sqrt{2}) - \mathrm{erf}(-6.25/sqrt{2})}{2}\right)^n} \exp\left(-0.5 r^2 \right)\f$ </td>
69 : </tr> <tr>
70 : <td> triangular </td> <td> \f$f(r) = \frac{3}{V} ( 1 - | r | )H(1-|r|) \f$ </td>
71 : </tr> <tr>
72 : <td> uniform </td> <td> \f$f(r) = \frac{1}{V}H(1-|r|)\f$ </td>
73 : </tr>
74 : </table>
75 :
76 : In the above \f$H(y)\f$ is a function that is equal to one when \f$y>0\f$ and zero when \f$y \le 0\f$. \f$n\f$ is
77 : the dimensionality of the vector \f$\mathbf{x}\f$ and \f$V\f$ is the volume of an elipse in an \f$n\f$ dimensional
78 : space which is given by:
79 :
80 : \f{eqnarray*}{
81 : V &=& | \Sigma^{-1} | \frac{ \pi^{\frac{n}{2}} }{\left( \frac{n}{2} \right)! } \qquad \textrm{for even} \quad n \\
82 : V &=& | \Sigma^{-1} | \frac{ 2^{\frac{n+1}{2}} \pi^{\frac{n-1}{2}} }{ n!! }
83 : \f}
84 :
85 : In \ref METAD the normalization constants are ignored so that the value of the function at \f$r=0\f$ is equal
86 : to one. In addition in \ref METAD we must be able to differentiate the bias in order to get forces. This limits
87 : the kernels we can use in this method. Notice also that Gaussian kernels should have infinite support. When used
88 : with grids, however, they are assumed to only be non-zero over a finite range. The difference between the
89 : truncated-gaussian and regular gaussian is that the trucated gaussian is scaled so that its integral over the grid
90 : is equal to one when it is normalised. The integral of a regular gaussian when it is evaluated on a grid will be
91 : slightly less that one because of the truncation of a function that should have infinite support.
92 : */
93 : //+ENDPLUMEDOC
94 :
95 12 : KernelFunctions::KernelFunctions( const std::string& input, const bool& normed ) {
96 12 : std::vector<std::string> data=Tools::getWords(input);
97 24 : std::string name=data[0];
98 12 : data.erase(data.begin());
99 :
100 24 : std::vector<double> at;
101 12 : bool foundc = Tools::parseVector(data,"CENTER",at);
102 12 : if(!foundc) plumed_merror("failed to find center keyword in definition of kernel");
103 24 : std::vector<double> sig;
104 12 : bool founds = Tools::parseVector(data,"SIGMA",sig);
105 12 : if(!founds) plumed_merror("failed to find sigma keyword in definition of kernel");
106 :
107 12 : bool multi=false; Tools::parseFlag(data,"MULTIVARIATE",multi);
108 12 : if( center.size()==1 && multi ) plumed_merror("one dimensional kernel cannot be multivariate");
109 12 : if( center.size()==1 && sig.size()!=1 ) plumed_merror("size mismatch between center size and sigma size");
110 12 : if( multi && center.size()>1 && sig.size()!=0.5*center.size()*(center.size()-1) ) plumed_merror("size mismatch between center size and sigma size");
111 12 : if( !multi && center.size()>1 && sig.size()!=center.size() ) plumed_merror("size mismatch between center size and sigma size");
112 :
113 : double h;
114 12 : bool foundh = Tools::parse(data,"HEIGHT",h);
115 12 : if( !foundh) h=1.0;
116 :
117 24 : setData( at, sig, name, multi, h, normed );
118 12 : }
119 :
120 30064 : KernelFunctions::KernelFunctions( const std::vector<double>& at, const std::vector<double>& sig, const std::string& type, const bool multivariate, const double& w, const bool norm ) {
121 30077 : setData( at, sig, type, multivariate, w, norm );
122 30022 : }
123 :
124 30083 : void KernelFunctions::setData( const std::vector<double>& at, const std::vector<double>& sig, const std::string& type, const bool multivariate, const double& w, const bool norm ) {
125 :
126 30083 : center.resize( at.size() ); for(unsigned i=0; i<at.size(); ++i) center[i]=at[i];
127 30090 : width.resize( sig.size() ); for(unsigned i=0; i<sig.size(); ++i) width[i]=sig[i];
128 30089 : diagonal=false;
129 30089 : if (multivariate==false ) diagonal=true;
130 :
131 : // Setup the kernel type
132 30089 : if(type=="GAUSSIAN" || type=="gaussian" || type=="TRUNCATED-GAUSSIAN" || type=="truncated-gaussian" ) {
133 30062 : ktype=gaussian;
134 4 : } else if(type=="UNIFORM" || type=="uniform") {
135 0 : ktype=uniform;
136 4 : } else if(type=="TRIANGULAR" || type=="triangular") {
137 4 : ktype=triangular;
138 : } else {
139 0 : plumed_merror(type+" is an invalid kernel type\n");
140 : }
141 :
142 30066 : if( norm ) {
143 24500 : double det; unsigned ncv=ndim();
144 24517 : if(diagonal) {
145 24517 : det=1; for(unsigned i=0; i<width.size(); ++i) det*=width[i]*width[i];
146 : } else {
147 0 : Matrix<double> mymatrix( getMatrix() ), myinv( ncv, ncv );
148 0 : Invert(mymatrix,myinv); double logd;
149 0 : logdet( myinv, logd );
150 0 : det=std::exp(logd);
151 : }
152 : double volume;
153 24517 : if( ktype==gaussian ) {
154 24517 : if( type=="GAUSSIAN" || type=="gaussian" ) volume=pow( 2*pi, 0.5*ncv ) * pow( det, 0.5 );
155 : else {
156 : // This makes it so the gaussian integrates to one over the range over which it has support
157 0 : const double DP2CUTOFF=sqrt(6.25);
158 0 : volume=pow( 2*pi, 0.5*ncv ) * pow( det, 0.5 ) * pow( 0.5 * ( erf(DP2CUTOFF) - erf(-DP2CUTOFF) ), ncv);
159 : }
160 0 : } else if( ktype==uniform || ktype==triangular ) {
161 0 : if( ncv%2==1 ) {
162 0 : double dfact=1;
163 0 : for(unsigned i=1; i<ncv; i+=2) dfact*=static_cast<double>(i);
164 0 : volume=( pow( pi, (ncv-1)/2 ) ) * ( pow( 2., (ncv+1)/2 ) ) / dfact;
165 : } else {
166 0 : double fact=1.;
167 0 : for(unsigned i=1; i<ncv/2; ++i) fact*=static_cast<double>(i);
168 0 : volume=pow( pi,ncv/2 ) / fact;
169 : }
170 0 : if(ktype==uniform) volume*=det;
171 0 : else if(ktype==triangular) volume*=det / 3.;
172 : } else {
173 0 : plumed_merror("not a valid kernel type");
174 : }
175 24471 : height=w / volume;
176 : } else {
177 5566 : height=w;
178 : }
179 30037 : }
180 :
181 10131 : double KernelFunctions::getCutoff( const double& width ) const {
182 10131 : const double DP2CUTOFF=6.25;
183 10131 : if( ktype==gaussian ) return sqrt(2.0*DP2CUTOFF)*width;
184 0 : else if(ktype==triangular ) return width;
185 0 : else if(ktype==uniform) return width;
186 0 : else plumed_merror("No valid kernel type");
187 : return 0.0;
188 : }
189 :
190 5078 : std::vector<double> KernelFunctions::getContinuousSupport( ) const {
191 5078 : unsigned ncv=ndim();
192 5078 : std::vector<double> support( ncv );
193 5078 : if(diagonal) {
194 710 : for(unsigned i=0; i<ncv; ++i) support[i]=getCutoff(width[i]);
195 : } else {
196 8736 : Matrix<double> mymatrix( getMatrix() ), myinv( ncv,ncv );
197 4368 : Invert(mymatrix,myinv);
198 8736 : Matrix<double> myautovec(ncv,ncv); std::vector<double> myautoval(ncv);
199 4368 : diagMat(myinv,myautoval,myautovec);
200 4368 : double maxautoval; maxautoval=0.;
201 : unsigned ind_maxautoval;
202 13104 : for (unsigned i=0; i<ncv; i++) {
203 8736 : if(myautoval[i]>maxautoval) {maxautoval=myautoval[i]; ind_maxautoval=i;}
204 : }
205 13104 : for(unsigned i=0; i<ncv; ++i) {
206 8736 : double extent=fabs(sqrt(maxautoval)*myautovec(i,ind_maxautoval));
207 8736 : support[i]=getCutoff( extent );
208 4368 : }
209 : }
210 5078 : return support;
211 : }
212 :
213 3405 : std::vector<unsigned> KernelFunctions::getSupport( const std::vector<double>& dx ) const {
214 3405 : plumed_assert( ndim()==dx.size() );
215 3405 : std::vector<unsigned> support( dx.size() );
216 6810 : std::vector<double> vv=getContinuousSupport( );
217 3405 : for(unsigned i=0; i<dx.size(); ++i) support[i]=static_cast<unsigned>(ceil( vv[i]/dx[i] ));
218 6810 : return support;
219 : }
220 :
221 14942064 : double KernelFunctions::evaluate( const std::vector<Value*>& pos, std::vector<double>& derivatives, bool usederiv, bool doInt, double lowI_, double uppI_) const {
222 : plumed_dbg_assert( pos.size()==ndim() && derivatives.size()==ndim() );
223 : #ifndef NDEBUG
224 : if( usederiv ) plumed_massert( ktype!=uniform, "step function can not be differentiated" );
225 : #endif
226 14942064 : if(doInt) {
227 : plumed_dbg_assert(center.size()==1);
228 0 : if(pos[0]->get()<lowI_) pos[0]->set(lowI_);
229 0 : if(pos[0]->get()>uppI_) pos[0]->set(uppI_);
230 : }
231 14942089 : double r2=0;
232 14942089 : if(diagonal) {
233 51682666 : for(unsigned i=0; i<ndim(); ++i) {
234 37707077 : derivatives[i]=-pos[i]->difference( center[i] ) / width[i];
235 37715808 : r2+=derivatives[i]*derivatives[i];
236 37715562 : derivatives[i] /= width[i];
237 : }
238 : } else {
239 970504 : Matrix<double> mymatrix( getMatrix() );
240 2911512 : for(unsigned i=0; i<mymatrix.nrows(); ++i) {
241 1941008 : double dp_i, dp_j; derivatives[i]=0;
242 1941008 : dp_i=-pos[i]->difference( center[i] );
243 5823024 : for(unsigned j=0; j<mymatrix.ncols(); ++j) {
244 3882016 : if(i==j) dp_j=dp_i;
245 1941008 : else dp_j=-pos[j]->difference( center[j] );
246 :
247 3882016 : derivatives[i]+=mymatrix(i,j)*dp_j;
248 3882016 : r2+=dp_i*dp_j*mymatrix(i,j);
249 : }
250 970504 : }
251 : }
252 : double kderiv, kval;
253 14942447 : if(ktype==gaussian) {
254 14907611 : kval=height*std::exp(-0.5*r2); kderiv=-kval;
255 : } else {
256 34836 : double r=sqrt(r2);
257 34836 : if(ktype==triangular) {
258 34836 : if( r<1.0 ) {
259 9753 : if(r==0) kderiv=0;
260 9753 : kderiv=-1; kval=height*( 1. - fabs(r) );
261 : } else {
262 25083 : kval=0.; kderiv=0.;
263 : }
264 0 : } else if(ktype==uniform) {
265 0 : kderiv=0.;
266 0 : if(r<1.0) kval=height;
267 0 : else kval=0;
268 : } else {
269 0 : plumed_merror("Not a valid kernel type");
270 : }
271 34836 : kderiv*=height / r ;
272 : }
273 14942447 : for(unsigned i=0; i<ndim(); ++i) derivatives[i]*=kderiv;
274 14942448 : if(doInt) {
275 0 : if((pos[0]->get() <= lowI_ || pos[0]->get() >= uppI_) && usederiv ) for(unsigned i=0; i<ndim(); ++i)derivatives[i]=0;
276 : }
277 14942448 : return kval;
278 : }
279 :
280 4462 : KernelFunctions* KernelFunctions::read( IFile* ifile, const std::vector<std::string>& valnames ) {
281 4462 : std::string sss; ifile->scanField("multivariate",sss);
282 8924 : std::vector<double> cc( valnames.size() ), sig;
283 : bool multivariate;
284 4462 : if( sss=="false" ) {
285 94 : multivariate=false;
286 94 : sig.resize( valnames.size() );
287 282 : for(unsigned i=0; i<valnames.size(); ++i) {
288 188 : ifile->scanField(valnames[i],cc[i]);
289 188 : ifile->scanField("sigma_"+valnames[i],sig[i]);
290 : }
291 4368 : } else if( sss=="true" ) {
292 4368 : multivariate=true;
293 4368 : unsigned ncv=valnames.size();
294 4368 : sig.resize( (ncv*(ncv+1))/2 );
295 8736 : Matrix<double> upper(ncv,ncv), lower(ncv,ncv);
296 13104 : for(unsigned i=0; i<ncv; ++i) {
297 8736 : ifile->scanField(valnames[i],cc[i]);
298 8736 : for(unsigned j=0; j<ncv-i; j++) { ifile->scanField("sigma_" +valnames[j+i] + "_" + valnames[j], lower(j+i,j) ); upper(j,j+i)=lower(j+i,j); }
299 : }
300 8736 : Matrix<double> mymult( ncv, ncv ), invmatrix(ncv,ncv);
301 4368 : mult(lower,upper,mymult); Invert( mymult, invmatrix );
302 4368 : unsigned k=0;
303 13104 : for(unsigned i=0; i<ncv; i++) {
304 8736 : for(unsigned j=i; j<ncv; j++) { sig[k]=invmatrix(i,j); k++; }
305 4368 : }
306 : } else {
307 0 : plumed_merror("multivariate flag should equal true or false");
308 : }
309 4462 : double h; ifile->scanField("height",h);
310 8924 : return new KernelFunctions( cc, sig, "gaussian", multivariate,h, false);
311 : }
312 :
313 4821 : }
|