Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2016-2019 The plumed team
3 : (see the PEOPLE file at the root of the distribution for a list of names)
4 :
5 : See http://www.plumed.org for more information.
6 :
7 : This file is part of plumed, version 2.
8 :
9 : plumed is free software: you can redistribute it and/or modify
10 : it under the terms of the GNU Lesser General Public License as published by
11 : the Free Software Foundation, either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : plumed is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU Lesser General Public License for more details.
18 :
19 : You should have received a copy of the GNU Lesser General Public License
20 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 :
23 : #include "bias/Bias.h"
24 : #include "bias/ActionRegister.h"
25 : #include "core/PlumedMain.h"
26 : #include "core/Atoms.h"
27 : #include "core/Value.h"
28 : #include "tools/File.h"
29 : #include "tools/OpenMP.h"
30 : #include "tools/Random.h"
31 : #include <cmath>
32 : #include <ctime>
33 : #include <numeric>
34 : using namespace std;
35 :
36 : #ifndef M_PI
37 : #define M_PI 3.14159265358979323846
38 : #endif
39 :
40 : namespace PLMD {
41 : namespace isdb {
42 :
43 : //+PLUMEDOC ISDB_BIAS METAINFERENCE
44 : /*
45 : Calculates the Metainference energy for a set of experimental data.
46 :
47 : Metainference \cite Bonomi:2016ip is a Bayesian framework
48 : to model heterogeneous systems by integrating prior information with noisy, ensemble-averaged data.
49 : Metainference models a system and quantifies the level of noise in the data by considering a set of replicas of the system.
50 :
51 : Calculated experimental data are given in input as ARG while reference experimental values
52 : can be given either from fixed components of other actions using PARARG or as numbers using
53 : PARAMETERS. The default behavior is that of averaging the data over the available replicas,
54 : if this is not wanted the keyword NOENSEMBLE prevent this averaging.
55 :
56 : Metadynamics Metainference \cite Bonomi:2016ge or more in general biased Metainference requires the knowledge of
57 : biasing potential in order to calculate the weighted average. In this case the value of the bias
58 : can be provided as the last argument in ARG and adding the keyword REWEIGHT. To avoid the noise
59 : resulting from the instantaneous value of the bias the weight of each replica can be averaged
60 : over a give time using the keyword AVERAGING.
61 :
62 : The data can be averaged by using multiple replicas and weighted for a bias if present.
63 : The functional form of Metainference can be chosen among four variants selected
64 : with NOISE=GAUSS,MGAUSS,OUTLIERS,MOUTLIERS,GENERIC which correspond to modelling the noise for
65 : the arguments as a single gaussian common to all the data points, a gaussian per data
66 : point, a single long-tailed gaussian common to all the data points, a log-tailed
67 : gaussian per data point or using two distinct noises as for the most general formulation of Metainference.
68 : In this latter case the noise of the replica-averaging is gaussian (one per data point) and the noise for
69 : the comparison with the experimental data can chosen using the keyword LIKELIHOOD
70 : between gaussian or log-normal (one per data point), furthermore the evolution of the estimated average
71 : over an infinite number of replicas is driven by DFTILDE.
72 :
73 : As for Metainference theory there are two sigma values: SIGMA_MEAN represent the
74 : error of calculating an average quantity using a finite set of replica and should
75 : be set as small as possible following the guidelines for replica-averaged simulations
76 : in the framework of the Maximum Entropy Principle. Alternatively, this can be obtained
77 : automatically using the internal sigma mean optimization as introduced in \cite Lohr:2017gc
78 : (OPTSIGMAMEAN=SEM), in this second case sigma_mean is estimated from the maximum standard error
79 : of the mean either over the simulation or over a defined time using the keyword AVERAGING.
80 : SIGMA_BIAS is an uncertainty parameter, sampled by a MC algorithm in the bounded interval
81 : defined by SIGMA_MIN and SIGMA_MAX. The initial value is set at SIGMA0. The MC move is a
82 : random displacement of maximum value equal to DSIGMA. If the number of data point is
83 : too large and the acceptance rate drops it is possible to make the MC move over mutually
84 : exclusive, random subset of size MC_CHUNKSIZE and run more than one move setting MC_STEPS
85 : in such a way that MC_CHUNKSIZE*MC_STEPS will cover all the data points.
86 :
87 : Calculated and experimental data can be compared modulo a scaling factor and/or an offset
88 : using SCALEDATA and/or ADDOFFSET, the sampling is obtained by a MC algorithm either using
89 : a flat or a gaussian prior setting it with SCALE_PRIOR or OFFSET_PRIOR.
90 :
91 : \par Examples
92 :
93 : In the following example we calculate a set of \ref RDC, take the replica-average of
94 : them and comparing them with a set of experimental values. RDCs are compared with
95 : the experimental data but for a multiplication factor SCALE that is also sampled by
96 : MC on-the-fly
97 :
98 : \plumedfile
99 : RDC ...
100 : LABEL=rdc
101 : SCALE=0.0001
102 : GYROM=-72.5388
103 : ATOMS1=22,23
104 : ATOMS2=25,27
105 : ATOMS3=29,31
106 : ATOMS4=33,34
107 : ... RDC
108 :
109 : METAINFERENCE ...
110 : ARG=rdc.*
111 : NOISETYPE=MGAUSS
112 : PARAMETERS=1.9190,2.9190,3.9190,4.9190
113 : SCALEDATA SCALE0=1 SCALE_MIN=0.1 SCALE_MAX=3 DSCALE=0.01
114 : SIGMA0=0.01 SIGMA_MIN=0.00001 SIGMA_MAX=3 DSIGMA=0.01
115 : SIGMA_MEAN0=0.001
116 : LABEL=spe
117 : ... METAINFERENCE
118 :
119 : PRINT ARG=spe.bias FILE=BIAS STRIDE=1
120 : \endplumedfile
121 :
122 : in the following example instead of using one uncertainty parameter per data point we use
123 : a single uncertainty value in a long-tailed gaussian to take into account for outliers, furthermore
124 : the data are weighted for the bias applied to other variables of the system.
125 :
126 : \plumedfile
127 : RDC ...
128 : LABEL=rdc
129 : SCALE=0.0001
130 : GYROM=-72.5388
131 : ATOMS1=22,23
132 : ATOMS2=25,27
133 : ATOMS3=29,31
134 : ATOMS4=33,34
135 : ... RDC
136 :
137 : cv1: TORSION ATOMS=1,2,3,4
138 : cv2: TORSION ATOMS=2,3,4,5
139 : mm: METAD ARG=cv1,cv2 HEIGHT=0.5 SIGMA=0.3,0.3 PACE=200 BIASFACTOR=8 WALKERS_MPI
140 :
141 : METAINFERENCE ...
142 : ARG=rdc.*,mm.bias
143 : REWEIGHT
144 : NOISETYPE=OUTLIERS
145 : PARAMETERS=1.9190,2.9190,3.9190,4.9190
146 : SCALEDATA SCALE0=1 SCALE_MIN=0.1 SCALE_MAX=3 DSCALE=0.01
147 : SIGMA0=0.01 SIGMA_MIN=0.00001 SIGMA_MAX=3 DSIGMA=0.01
148 : SIGMA_MEAN=0.001
149 : LABEL=spe
150 : ... METAINFERENCE
151 : \endplumedfile
152 :
153 : (See also \ref RDC, \ref PBMETAD).
154 :
155 : */
156 : //+ENDPLUMEDOC
157 :
158 : class Metainference : public bias::Bias
159 : {
160 : // experimental values
161 : vector<double> parameters;
162 : // noise type
163 : unsigned noise_type_;
164 : enum { GAUSS, MGAUSS, OUTLIERS, MOUTLIERS, GENERIC };
165 : unsigned gen_likelihood_;
166 : enum { LIKE_GAUSS, LIKE_LOGN };
167 : // scale is data scaling factor
168 : // noise type
169 : unsigned scale_prior_;
170 : enum { SC_GAUSS, SC_FLAT };
171 : bool doscale_;
172 : double scale_;
173 : double scale_mu_;
174 : double scale_min_;
175 : double scale_max_;
176 : double Dscale_;
177 : // scale is data scaling factor
178 : // noise type
179 : unsigned offset_prior_;
180 : bool dooffset_;
181 : double offset_;
182 : double offset_mu_;
183 : double offset_min_;
184 : double offset_max_;
185 : double Doffset_;
186 : // scale and offset regression
187 : bool doregres_zero_;
188 : int nregres_zero_;
189 : // sigma is data uncertainty
190 : vector<double> sigma_;
191 : vector<double> sigma_min_;
192 : vector<double> sigma_max_;
193 : vector<double> Dsigma_;
194 : // sigma_mean is uncertainty in the mean estimate
195 : vector<double> sigma_mean2_;
196 : // this is the estimator of the mean value per replica for generic metainference
197 : vector<double> ftilde_;
198 : double Dftilde_;
199 :
200 : // temperature in kbt
201 : double kbt_;
202 :
203 : // Monte Carlo stuff
204 : vector<Random> random;
205 : unsigned MCsteps_;
206 : long unsigned MCaccept_;
207 : long unsigned MCacceptScale_;
208 : long unsigned MCacceptFT_;
209 : long unsigned MCtrial_;
210 : unsigned MCchunksize_;
211 :
212 : // output
213 : Value* valueScale;
214 : Value* valueOffset;
215 : Value* valueAccept;
216 : Value* valueAcceptScale;
217 : Value* valueAcceptFT;
218 : vector<Value*> valueSigma;
219 : vector<Value*> valueSigmaMean;
220 : vector<Value*> valueFtilde;
221 :
222 : // restart
223 : unsigned write_stride_;
224 : OFile sfile_;
225 :
226 : // others
227 : bool firstTime;
228 : vector<bool> firstTimeW;
229 : bool master;
230 : bool do_reweight_;
231 : unsigned do_optsigmamean_;
232 : unsigned nrep_;
233 : unsigned replica_;
234 : unsigned narg;
235 :
236 : // selector
237 : string selector_;
238 :
239 : // optimize sigma mean
240 : vector< vector < vector <double> > > sigma_mean2_last_;
241 : unsigned optsigmamean_stride_;
242 :
243 : // average weights
244 : unsigned average_weights_stride_;
245 : vector< vector <double> > average_weights_;
246 :
247 : double getEnergyMIGEN(const vector<double> &mean, const vector<double> &ftilde, const vector<double> &sigma,
248 : const double scale, const double offset);
249 : double getEnergySP(const vector<double> &mean, const vector<double> &sigma,
250 : const double scale, const double offset);
251 : double getEnergySPE(const vector<double> &mean, const vector<double> &sigma,
252 : const double scale, const double offset);
253 : double getEnergyGJ(const vector<double> &mean, const vector<double> &sigma,
254 : const double scale, const double offset);
255 : double getEnergyGJE(const vector<double> &mean, const vector<double> &sigma,
256 : const double scale, const double offset);
257 : double doMonteCarlo(const vector<double> &mean);
258 : void getEnergyForceMIGEN(const vector<double> &mean, const vector<double> &dmean_x, const vector<double> &dmean_b);
259 : void getEnergyForceSP(const vector<double> &mean, const vector<double> &dmean_x, const vector<double> &dmean_b);
260 : void getEnergyForceSPE(const vector<double> &mean, const vector<double> &dmean_x, const vector<double> &dmean_b);
261 : void getEnergyForceGJ(const vector<double> &mean, const vector<double> &dmean_x, const vector<double> &dmean_b);
262 : void getEnergyForceGJE(const vector<double> &mean, const vector<double> &dmean_x, const vector<double> &dmean_b);
263 : void get_weights(const unsigned iselect, double &fact, double &var_fact);
264 : void replica_averaging(const double fact, std::vector<double> &mean, std::vector<double> &dmean_b);
265 : void get_sigma_mean(const unsigned iselect, const double fact, const double var_fact, const vector<double> &mean);
266 : void writeStatus();
267 : void do_regression_zero(const vector<double> &mean);
268 :
269 : public:
270 : explicit Metainference(const ActionOptions&);
271 : ~Metainference();
272 : void calculate() override;
273 : void update() override;
274 : static void registerKeywords(Keywords& keys);
275 : };
276 :
277 :
278 7870 : PLUMED_REGISTER_ACTION(Metainference,"METAINFERENCE")
279 :
280 20 : void Metainference::registerKeywords(Keywords& keys) {
281 20 : Bias::registerKeywords(keys);
282 40 : keys.use("ARG");
283 80 : keys.add("optional","PARARG","reference values for the experimental data, these can be provided as arguments without derivatives");
284 80 : keys.add("optional","PARAMETERS","reference values for the experimental data");
285 60 : keys.addFlag("NOENSEMBLE",false,"don't perform any replica-averaging");
286 60 : keys.addFlag("REWEIGHT",false,"simple REWEIGHT using the latest ARG as energy");
287 80 : keys.add("optional","AVERAGING", "Stride for calculation of averaged weights and sigma_mean");
288 100 : keys.add("compulsory","NOISETYPE","MGAUSS","functional form of the noise (GAUSS,MGAUSS,OUTLIERS,MOUTLIERS,GENERIC)");
289 100 : keys.add("compulsory","LIKELIHOOD","GAUSS","the likelihood for the GENERIC metainference model, GAUSS or LOGN");
290 100 : keys.add("compulsory","DFTILDE","0.1","fraction of sigma_mean used to evolve ftilde");
291 60 : keys.addFlag("SCALEDATA",false,"Set to TRUE if you want to sample a scaling factor common to all values and replicas");
292 100 : keys.add("compulsory","SCALE0","1.0","initial value of the scaling factor");
293 100 : keys.add("compulsory","SCALE_PRIOR","FLAT","either FLAT or GAUSSIAN");
294 80 : keys.add("optional","SCALE_MIN","minimum value of the scaling factor");
295 80 : keys.add("optional","SCALE_MAX","maximum value of the scaling factor");
296 80 : keys.add("optional","DSCALE","maximum MC move of the scaling factor");
297 60 : keys.addFlag("ADDOFFSET",false,"Set to TRUE if you want to sample an offset common to all values and replicas");
298 100 : keys.add("compulsory","OFFSET0","0.0","initial value of the offset");
299 100 : keys.add("compulsory","OFFSET_PRIOR","FLAT","either FLAT or GAUSSIAN");
300 80 : keys.add("optional","OFFSET_MIN","minimum value of the offset");
301 80 : keys.add("optional","OFFSET_MAX","maximum value of the offset");
302 80 : keys.add("optional","DOFFSET","maximum MC move of the offset");
303 80 : keys.add("optional","REGRES_ZERO","stride for regression with zero offset");
304 100 : keys.add("compulsory","SIGMA0","1.0","initial value of the uncertainty parameter");
305 100 : keys.add("compulsory","SIGMA_MIN","0.0","minimum value of the uncertainty parameter");
306 100 : keys.add("compulsory","SIGMA_MAX","10.","maximum value of the uncertainty parameter");
307 80 : keys.add("optional","DSIGMA","maximum MC move of the uncertainty parameter");
308 100 : keys.add("compulsory","OPTSIGMAMEAN","NONE","Set to NONE/SEM to manually set sigma mean, or to estimate it on the fly");
309 80 : keys.add("optional","SIGMA_MEAN0","starting value for the uncertainty in the mean estimate");
310 80 : keys.add("optional","TEMP","the system temperature - this is only needed if code doesn't pass the temperature to plumed");
311 80 : keys.add("optional","MC_STEPS","number of MC steps");
312 80 : keys.add("optional","MC_CHUNKSIZE","MC chunksize");
313 80 : keys.add("optional","STATUS_FILE","write a file with all the data useful for restart/continuation of Metainference");
314 100 : keys.add("compulsory","WRITE_STRIDE","10000","write the status to a file every N steps, this can be used for restart/continuation");
315 80 : keys.add("optional","SELECTOR","name of selector");
316 80 : keys.add("optional","NSELECT","range of values for selector [0, N-1]");
317 40 : keys.use("RESTART");
318 80 : keys.addOutputComponent("sigma", "default", "uncertainty parameter");
319 80 : keys.addOutputComponent("sigmaMean", "default", "uncertainty in the mean estimate");
320 80 : keys.addOutputComponent("acceptSigma", "default", "MC acceptance for sigma values");
321 80 : keys.addOutputComponent("acceptScale", "SCALEDATA", "MC acceptance for scale value");
322 80 : keys.addOutputComponent("acceptFT", "GENERIC", "MC acceptance for general metainference f tilde value");
323 80 : keys.addOutputComponent("weight", "REWEIGHT", "weights of the weighted average");
324 80 : keys.addOutputComponent("biasDer", "REWEIGHT", "derivatives with respect to the bias");
325 80 : keys.addOutputComponent("scale", "SCALEDATA", "scale parameter");
326 80 : keys.addOutputComponent("offset", "ADDOFFSET", "offset parameter");
327 80 : keys.addOutputComponent("ftilde", "GENERIC", "ensemble average estimator");
328 20 : }
329 :
330 19 : Metainference::Metainference(const ActionOptions&ao):
331 : PLUMED_BIAS_INIT(ao),
332 : doscale_(false),
333 : scale_(1.),
334 : scale_mu_(0),
335 : scale_min_(1),
336 : scale_max_(-1),
337 : Dscale_(-1),
338 : dooffset_(false),
339 : offset_(0.),
340 : offset_mu_(0),
341 : offset_min_(1),
342 : offset_max_(-1),
343 : Doffset_(-1),
344 : doregres_zero_(false),
345 : nregres_zero_(0),
346 : Dftilde_(0.1),
347 : random(3),
348 : MCsteps_(1),
349 : MCaccept_(0),
350 : MCacceptScale_(0),
351 : MCacceptFT_(0),
352 : MCtrial_(0),
353 : MCchunksize_(0),
354 : write_stride_(0),
355 : firstTime(true),
356 : do_reweight_(false),
357 : do_optsigmamean_(0),
358 : optsigmamean_stride_(0),
359 114 : average_weights_stride_(1)
360 : {
361 19 : bool noensemble = false;
362 38 : parseFlag("NOENSEMBLE", noensemble);
363 :
364 : // set up replica stuff
365 19 : master = (comm.Get_rank()==0);
366 19 : if(master) {
367 11 : nrep_ = multi_sim_comm.Get_size();
368 11 : replica_ = multi_sim_comm.Get_rank();
369 11 : if(noensemble) nrep_ = 1;
370 : } else {
371 8 : nrep_ = 0;
372 8 : replica_ = 0;
373 : }
374 19 : comm.Sum(&nrep_,1);
375 19 : comm.Sum(&replica_,1);
376 :
377 19 : unsigned nsel = 1;
378 38 : parse("SELECTOR", selector_);
379 38 : parse("NSELECT", nsel);
380 : // do checks
381 19 : if(selector_.length()>0 && nsel<=1) error("With SELECTOR active, NSELECT must be greater than 1");
382 19 : if(selector_.length()==0 && nsel>1) error("With NSELECT greater than 1, you must specify SELECTOR");
383 :
384 : // initialise firstTimeW
385 19 : firstTimeW.resize(nsel, true);
386 :
387 : // reweight implies a different number of arguments (the latest one must always be the bias)
388 38 : parseFlag("REWEIGHT", do_reweight_);
389 19 : if(do_reweight_&&nrep_<2) error("REWEIGHT can only be used in parallel with 2 or more replicas");
390 38 : if(!getRestart()) average_weights_.resize(nsel, vector<double> (nrep_, 1./static_cast<double>(nrep_)));
391 0 : else average_weights_.resize(nsel, vector<double> (nrep_, 0.));
392 19 : narg = getNumberOfArguments();
393 19 : if(do_reweight_) narg--;
394 :
395 19 : unsigned averaging=0;
396 38 : parse("AVERAGING", averaging);
397 19 : if(averaging>0) {
398 0 : average_weights_stride_ = averaging;
399 0 : optsigmamean_stride_ = averaging;
400 : }
401 :
402 38 : parseVector("PARAMETERS",parameters);
403 19 : if(parameters.size()!=static_cast<unsigned>(narg)&&!parameters.empty())
404 0 : error("Size of PARAMETERS array should be either 0 or the same as of the number of arguments in ARG1");
405 :
406 : vector<Value*> arg2;
407 38 : parseArgumentList("PARARG",arg2);
408 19 : if(!arg2.empty()) {
409 4 : if(parameters.size()>0) error("It is not possible to use PARARG and PARAMETERS together");
410 4 : if(arg2.size()!=narg) error("Size of PARARG array should be the same as number for arguments in ARG");
411 4716 : for(unsigned i=0; i<arg2.size(); i++) {
412 7068 : parameters.push_back(arg2[i]->get());
413 4712 : if(arg2[i]->hasDerivatives()==true) error("PARARG can only accept arguments without derivatives");
414 : }
415 : }
416 :
417 19 : if(parameters.size()!=narg)
418 0 : error("PARARG or PARAMETERS arrays should include the same number of elements as the arguments in ARG");
419 :
420 : string stringa_noise;
421 38 : parse("NOISETYPE",stringa_noise);
422 19 : if(stringa_noise=="GAUSS") noise_type_ = GAUSS;
423 18 : else if(stringa_noise=="MGAUSS") noise_type_ = MGAUSS;
424 10 : else if(stringa_noise=="OUTLIERS") noise_type_ = OUTLIERS;
425 5 : else if(stringa_noise=="MOUTLIERS") noise_type_ = MOUTLIERS;
426 1 : else if(stringa_noise=="GENERIC") noise_type_ = GENERIC;
427 0 : else error("Unknown noise type!");
428 :
429 19 : if(noise_type_== GENERIC) {
430 : string stringa_like;
431 2 : parse("LIKELIHOOD",stringa_like);
432 1 : if(stringa_like=="GAUSS") gen_likelihood_ = LIKE_GAUSS;
433 0 : else if(stringa_like=="LOGN") gen_likelihood_ = LIKE_LOGN;
434 0 : else error("Unknown likelihood type!");
435 :
436 2 : parse("DFTILDE",Dftilde_);
437 : }
438 :
439 38 : parse("WRITE_STRIDE",write_stride_);
440 : string status_file_name_;
441 38 : parse("STATUS_FILE",status_file_name_);
442 38 : if(status_file_name_=="") status_file_name_ = "MISTATUS"+getLabel();
443 0 : else status_file_name_ = status_file_name_+getLabel();
444 :
445 : string stringa_optsigma;
446 38 : parse("OPTSIGMAMEAN", stringa_optsigma);
447 19 : if(stringa_optsigma=="NONE") do_optsigmamean_=0;
448 0 : else if(stringa_optsigma=="SEM") do_optsigmamean_=1;
449 :
450 : // resize vector for sigma_mean history
451 19 : sigma_mean2_last_.resize(nsel);
452 38 : for(unsigned i=0; i<nsel; i++) sigma_mean2_last_[i].resize(narg);
453 :
454 : vector<double> read_sigma_mean_;
455 38 : parseVector("SIGMA_MEAN0",read_sigma_mean_);
456 38 : if(!do_optsigmamean_ && read_sigma_mean_.size()==0 && !getRestart())
457 0 : error("If you don't use OPTSIGMAMEAN and you are not RESTARTING then you MUST SET SIGMA_MEAN0");
458 :
459 19 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
460 13 : if(read_sigma_mean_.size()==narg) {
461 0 : sigma_mean2_.resize(narg);
462 0 : for(unsigned i=0; i<narg; i++) sigma_mean2_[i]=read_sigma_mean_[i]*read_sigma_mean_[i];
463 13 : } else if(read_sigma_mean_.size()==1) {
464 13 : sigma_mean2_.resize(narg,read_sigma_mean_[0]*read_sigma_mean_[0]);
465 0 : } else if(read_sigma_mean_.size()==0) {
466 0 : sigma_mean2_.resize(narg,0.000001);
467 : } else {
468 0 : error("SIGMA_MEAN0 can accept either one single value or as many values as the arguments (with NOISETYPE=MGAUSS|MOUTLIERS)");
469 : }
470 : // set the initial value for the history
471 7170 : for(unsigned i=0; i<nsel; i++) for(unsigned j=0; j<narg; j++) sigma_mean2_last_[i][j].push_back(sigma_mean2_[j]);
472 : } else {
473 6 : if(read_sigma_mean_.size()==1) {
474 6 : sigma_mean2_.resize(1, read_sigma_mean_[0]*read_sigma_mean_[0]);
475 0 : } else if(read_sigma_mean_.size()==0) {
476 0 : sigma_mean2_.resize(1, 0.000001);
477 : } else {
478 0 : error("If you want to use more than one SIGMA_MEAN0 you should use NOISETYPE=MGAUSS|MOUTLIERS");
479 : }
480 : // set the initial value for the history
481 50 : for(unsigned i=0; i<nsel; i++) for(unsigned j=0; j<narg; j++) sigma_mean2_last_[i][j].push_back(sigma_mean2_[0]);
482 : }
483 :
484 38 : parseFlag("SCALEDATA", doscale_);
485 19 : if(doscale_) {
486 : string stringa_noise;
487 24 : parse("SCALE_PRIOR",stringa_noise);
488 12 : if(stringa_noise=="GAUSSIAN") scale_prior_ = SC_GAUSS;
489 12 : else if(stringa_noise=="FLAT") scale_prior_ = SC_FLAT;
490 0 : else error("Unknown SCALE_PRIOR type!");
491 24 : parse("SCALE0",scale_);
492 24 : parse("DSCALE",Dscale_);
493 12 : if(Dscale_<0.) error("DSCALE must be set when using SCALEDATA");
494 12 : if(scale_prior_==SC_GAUSS) {
495 0 : scale_mu_=scale_;
496 : } else {
497 24 : parse("SCALE_MIN",scale_min_);
498 24 : parse("SCALE_MAX",scale_max_);
499 12 : if(scale_max_<scale_min_) error("SCALE_MAX and SCALE_MIN must be set when using SCALE_PRIOR=FLAT");
500 : }
501 : }
502 :
503 38 : parseFlag("ADDOFFSET", dooffset_);
504 19 : if(dooffset_) {
505 : string stringa_noise;
506 4 : parse("OFFSET_PRIOR",stringa_noise);
507 2 : if(stringa_noise=="GAUSSIAN") offset_prior_ = SC_GAUSS;
508 2 : else if(stringa_noise=="FLAT") offset_prior_ = SC_FLAT;
509 0 : else error("Unknown OFFSET_PRIOR type!");
510 4 : parse("OFFSET0",offset_);
511 4 : parse("DOFFSET",Doffset_);
512 2 : if(offset_prior_==SC_GAUSS) {
513 0 : offset_mu_=offset_;
514 0 : if(Doffset_<0.) error("DOFFSET must be set when using OFFSET_PRIOR=GAUSS");
515 : } else {
516 4 : parse("OFFSET_MIN",offset_min_);
517 4 : parse("OFFSET_MAX",offset_max_);
518 2 : if(Doffset_<0) Doffset_ = 0.05*(offset_max_ - offset_min_);
519 2 : if(offset_max_<offset_min_) error("OFFSET_MAX and OFFSET_MIN must be set when using OFFSET_PRIOR=FLAT");
520 : }
521 : }
522 :
523 : // regression with zero intercept
524 38 : parse("REGRES_ZERO", nregres_zero_);
525 19 : if(nregres_zero_>0) {
526 : // set flag
527 0 : doregres_zero_=true;
528 : // check if already sampling scale and offset
529 0 : if(doscale_) error("REGRES_ZERO and SCALEDATA are mutually exclusive");
530 0 : if(dooffset_) error("REGRES_ZERO and ADDOFFSET are mutually exclusive");
531 : }
532 :
533 : vector<double> readsigma;
534 38 : parseVector("SIGMA0",readsigma);
535 25 : if((noise_type_!=MGAUSS&&noise_type_!=MOUTLIERS&&noise_type_!=GENERIC)&&readsigma.size()>1)
536 0 : error("If you want to use more than one SIGMA you should use NOISETYPE=MGAUSS|MOUTLIERS|GENERIC");
537 19 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
538 13 : sigma_.resize(readsigma.size());
539 13 : sigma_=readsigma;
540 6 : } else sigma_.resize(1, readsigma[0]);
541 :
542 : vector<double> readsigma_min;
543 38 : parseVector("SIGMA_MIN",readsigma_min);
544 25 : if((noise_type_!=MGAUSS&&noise_type_!=MOUTLIERS&&noise_type_!=GENERIC)&&readsigma_min.size()>1)
545 0 : error("If you want to use more than one SIGMA you should use NOISETYPE=MGAUSS|MOUTLIERS|GENERIC");
546 19 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
547 13 : sigma_min_.resize(readsigma_min.size());
548 13 : sigma_min_=readsigma_min;
549 6 : } else sigma_min_.resize(1, readsigma_min[0]);
550 :
551 : vector<double> readsigma_max;
552 38 : parseVector("SIGMA_MAX",readsigma_max);
553 25 : if((noise_type_!=MGAUSS&&noise_type_!=MOUTLIERS&&noise_type_!=GENERIC)&&readsigma_max.size()>1)
554 0 : error("If you want to use more than one SIGMA you should use NOISETYPE=MGAUSS|MOUTLIERS|GENERIC");
555 19 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
556 13 : sigma_max_.resize(readsigma_max.size());
557 13 : sigma_max_=readsigma_max;
558 6 : } else sigma_max_.resize(1, readsigma_max[0]);
559 :
560 19 : if(sigma_max_.size()!=sigma_min_.size()) error("The number of values for SIGMA_MIN and SIGMA_MAX must be the same");
561 :
562 : vector<double> read_dsigma;
563 38 : parseVector("DSIGMA",read_dsigma);
564 25 : if((noise_type_!=MGAUSS&&noise_type_!=MOUTLIERS&&noise_type_!=GENERIC)&&readsigma_max.size()>1)
565 0 : error("If you want to use more than one SIGMA you should use NOISETYPE=MGAUSS|MOUTLIERS|GENERIC");
566 19 : if(read_dsigma.size()>0) {
567 19 : Dsigma_.resize(read_dsigma.size());
568 19 : Dsigma_=read_dsigma;
569 : } else {
570 0 : Dsigma_.resize(sigma_max_.size());
571 0 : for(unsigned i=0; i<sigma_max_.size(); i++) Dsigma_[i] = 0.05*(sigma_max_[i] - sigma_min_[i]);
572 : }
573 :
574 : // monte carlo stuff
575 38 : parse("MC_STEPS",MCsteps_);
576 38 : parse("MC_CHUNKSIZE", MCchunksize_);
577 : // get temperature
578 19 : double temp=0.0;
579 38 : parse("TEMP",temp);
580 38 : if(temp>0.0) kbt_=plumed.getAtoms().getKBoltzmann()*temp;
581 0 : else kbt_=plumed.getAtoms().getKbT();
582 19 : if(kbt_==0.0) error("Unless the MD engine passes the temperature to plumed, you must specify it using TEMP");
583 :
584 19 : checkRead();
585 :
586 : // set sigma_bias
587 19 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
588 13 : if(sigma_.size()==1) {
589 13 : double tmp = sigma_[0];
590 13 : sigma_.resize(narg, tmp);
591 0 : } else if(sigma_.size()>1&&sigma_.size()!=narg) {
592 0 : error("SIGMA0 can accept either one single value or as many values as the number of arguments (with NOISETYPE=MGAUSS|MOUTLIERS|GENERIC)");
593 : }
594 13 : if(sigma_min_.size()==1) {
595 13 : double tmp = sigma_min_[0];
596 13 : sigma_min_.resize(narg, tmp);
597 0 : } else if(sigma_min_.size()>1&&sigma_min_.size()!=narg) {
598 0 : error("SIGMA_MIN can accept either one single value or as many values as the number of arguments (with NOISETYPE=MGAUSS|MOUTLIERS|GENERIC)");
599 : }
600 13 : if(sigma_max_.size()==1) {
601 13 : double tmp = sigma_max_[0];
602 13 : sigma_max_.resize(narg, tmp);
603 0 : } else if(sigma_max_.size()>1&&sigma_max_.size()!=narg) {
604 0 : error("SIGMA_MAX can accept either one single value or as many values as the number of arguments (with NOISETYPE=MGAUSS|MOUTLIERS|GENERIC)");
605 : }
606 13 : if(Dsigma_.size()==1) {
607 13 : double tmp = Dsigma_[0];
608 13 : Dsigma_.resize(narg, tmp);
609 0 : } else if(Dsigma_.size()>1&&Dsigma_.size()!=narg) {
610 0 : error("DSIGMA can accept either one single value or as many values as the number of arguments (with NOISETYPE=MGAUSS|MOUTLIERS|GENERIC)");
611 : }
612 : }
613 :
614 38 : IFile restart_sfile;
615 19 : restart_sfile.link(*this);
616 19 : if(getRestart()&&restart_sfile.FileExist(status_file_name_)) {
617 0 : firstTime = false;
618 0 : for(unsigned i=0; i<nsel; i++) firstTimeW[i] = false;
619 0 : restart_sfile.open(status_file_name_);
620 0 : log.printf(" Restarting from %s\n", status_file_name_.c_str());
621 : double dummy;
622 0 : if(restart_sfile.scanField("time",dummy)) {
623 : // nsel
624 0 : for(unsigned i=0; i<sigma_mean2_last_.size(); i++) {
625 : std::string msg_i;
626 0 : Tools::convert(i,msg_i);
627 : // narg
628 0 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
629 0 : for(unsigned j=0; j<narg; ++j) {
630 : std::string msg_j;
631 0 : Tools::convert(j,msg_j);
632 0 : std::string msg = msg_i+"_"+msg_j;
633 : double read_sm;
634 0 : restart_sfile.scanField("sigmaMean_"+msg,read_sm);
635 0 : sigma_mean2_last_[i][j][0] = read_sm*read_sm;
636 : }
637 : }
638 0 : if(noise_type_==GAUSS||noise_type_==OUTLIERS) {
639 : double read_sm;
640 : std::string msg_j;
641 0 : Tools::convert(0,msg_j);
642 0 : std::string msg = msg_i+"_"+msg_j;
643 0 : restart_sfile.scanField("sigmaMean_"+msg,read_sm);
644 0 : for(unsigned j=0; j<narg; j++) sigma_mean2_last_[i][j][0] = read_sm*read_sm;
645 : }
646 : }
647 :
648 0 : for(unsigned i=0; i<sigma_.size(); ++i) {
649 : std::string msg;
650 0 : Tools::convert(i,msg);
651 0 : restart_sfile.scanField("sigma_"+msg,sigma_[i]);
652 : }
653 0 : if(noise_type_==GENERIC) {
654 0 : for(unsigned i=0; i<ftilde_.size(); ++i) {
655 : std::string msg;
656 0 : Tools::convert(i,msg);
657 0 : restart_sfile.scanField("ftilde_"+msg,ftilde_[i]);
658 : }
659 : }
660 0 : restart_sfile.scanField("scale0_",scale_);
661 0 : restart_sfile.scanField("offset0_",offset_);
662 :
663 0 : for(unsigned i=0; i<nsel; i++) {
664 : std::string msg;
665 0 : Tools::convert(i,msg);
666 : double tmp_w;
667 0 : restart_sfile.scanField("weight_"+msg,tmp_w);
668 0 : if(master) {
669 0 : average_weights_[i][replica_] = tmp_w;
670 0 : if(nrep_>1) multi_sim_comm.Sum(&average_weights_[i][0], nrep_);
671 : }
672 0 : comm.Sum(&average_weights_[i][0], nrep_);
673 : }
674 :
675 : }
676 0 : restart_sfile.scanField();
677 0 : restart_sfile.close();
678 : }
679 :
680 19 : switch(noise_type_) {
681 : case GENERIC:
682 1 : log.printf(" with general metainference ");
683 1 : if(gen_likelihood_==LIKE_GAUSS) log.printf(" and a gaussian likelihood\n");
684 0 : else if(gen_likelihood_==LIKE_LOGN) log.printf(" and a log-normal likelihood\n");
685 1 : log.printf(" ensemble average parameter sampled with a step %lf of sigma_mean\n", Dftilde_);
686 : break;
687 : case GAUSS:
688 1 : log.printf(" with gaussian noise and a single noise parameter for all the data\n");
689 : break;
690 : case MGAUSS:
691 8 : log.printf(" with gaussian noise and a noise parameter for each data point\n");
692 : break;
693 : case OUTLIERS:
694 5 : log.printf(" with long tailed gaussian noise and a single noise parameter for all the data\n");
695 : break;
696 : case MOUTLIERS:
697 4 : log.printf(" with long tailed gaussian noise and a noise parameter for each data point\n");
698 : break;
699 : }
700 :
701 19 : if(doscale_) {
702 12 : log.printf(" sampling a common scaling factor with:\n");
703 12 : log.printf(" initial scale parameter %f\n",scale_);
704 12 : if(scale_prior_==SC_GAUSS) {
705 0 : log.printf(" gaussian prior with mean %f and width %f\n",scale_mu_,Dscale_);
706 : }
707 12 : if(scale_prior_==SC_FLAT) {
708 12 : log.printf(" flat prior between %f - %f\n",scale_min_,scale_max_);
709 12 : log.printf(" maximum MC move of scale parameter %f\n",Dscale_);
710 : }
711 : }
712 :
713 19 : if(dooffset_) {
714 2 : log.printf(" sampling a common offset with:\n");
715 2 : log.printf(" initial offset parameter %f\n",offset_);
716 2 : if(offset_prior_==SC_GAUSS) {
717 0 : log.printf(" gaussian prior with mean %f and width %f\n",offset_mu_,Doffset_);
718 : }
719 2 : if(offset_prior_==SC_FLAT) {
720 2 : log.printf(" flat prior between %f - %f\n",offset_min_,offset_max_);
721 2 : log.printf(" maximum MC move of offset parameter %f\n",Doffset_);
722 : }
723 : }
724 :
725 19 : if(doregres_zero_)
726 0 : log.printf(" doing regression with zero intercept with stride: %d\n", nregres_zero_);
727 :
728 19 : log.printf(" number of experimental data points %u\n",narg);
729 19 : log.printf(" number of replicas %u\n",nrep_);
730 19 : log.printf(" initial data uncertainties");
731 4811 : for(unsigned i=0; i<sigma_.size(); ++i) log.printf(" %f", sigma_[i]);
732 19 : log.printf("\n");
733 19 : log.printf(" minimum data uncertainties");
734 4811 : for(unsigned i=0; i<sigma_.size(); ++i) log.printf(" %f",sigma_min_[i]);
735 19 : log.printf("\n");
736 19 : log.printf(" maximum data uncertainties");
737 4811 : for(unsigned i=0; i<sigma_.size(); ++i) log.printf(" %f",sigma_max_[i]);
738 19 : log.printf("\n");
739 19 : log.printf(" maximum MC move of data uncertainties");
740 4811 : for(unsigned i=0; i<sigma_.size(); ++i) log.printf(" %f",Dsigma_[i]);
741 19 : log.printf("\n");
742 19 : log.printf(" temperature of the system %f\n",kbt_);
743 19 : log.printf(" MC steps %u\n",MCsteps_);
744 19 : log.printf(" initial standard errors of the mean");
745 4811 : for(unsigned i=0; i<sigma_mean2_.size(); ++i) log.printf(" %f", sqrt(sigma_mean2_[i]));
746 19 : log.printf("\n");
747 :
748 19 : if(do_reweight_) {
749 32 : addComponent("biasDer");
750 32 : componentIsNotPeriodic("biasDer");
751 32 : addComponent("weight");
752 32 : componentIsNotPeriodic("weight");
753 : }
754 :
755 19 : if(doscale_ || doregres_zero_) {
756 24 : addComponent("scale");
757 24 : componentIsNotPeriodic("scale");
758 24 : valueScale=getPntrToComponent("scale");
759 : }
760 :
761 19 : if(dooffset_) {
762 4 : addComponent("offset");
763 4 : componentIsNotPeriodic("offset");
764 4 : valueOffset=getPntrToComponent("offset");
765 : }
766 :
767 19 : if(dooffset_||doscale_) {
768 28 : addComponent("acceptScale");
769 28 : componentIsNotPeriodic("acceptScale");
770 28 : valueAcceptScale=getPntrToComponent("acceptScale");
771 : }
772 :
773 19 : if(noise_type_==GENERIC) {
774 2 : addComponent("acceptFT");
775 2 : componentIsNotPeriodic("acceptFT");
776 2 : valueAcceptFT=getPntrToComponent("acceptFT");
777 : }
778 :
779 38 : addComponent("acceptSigma");
780 38 : componentIsNotPeriodic("acceptSigma");
781 38 : valueAccept=getPntrToComponent("acceptSigma");
782 :
783 19 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
784 4793 : for(unsigned i=0; i<sigma_mean2_.size(); ++i) {
785 2390 : std::string num; Tools::convert(i,num);
786 7170 : addComponent("sigmaMean-"+num); componentIsNotPeriodic("sigmaMean-"+num);
787 7170 : valueSigmaMean.push_back(getPntrToComponent("sigmaMean-"+num));
788 4780 : getPntrToComponent("sigmaMean-"+num)->set(sqrt(sigma_mean2_[i]));
789 7170 : addComponent("sigma-"+num); componentIsNotPeriodic("sigma-"+num);
790 7170 : valueSigma.push_back(getPntrToComponent("sigma-"+num));
791 4780 : getPntrToComponent("sigma-"+num)->set(sigma_[i]);
792 2390 : if(noise_type_==GENERIC) {
793 6 : addComponent("ftilde-"+num); componentIsNotPeriodic("ftilde-"+num);
794 6 : valueFtilde.push_back(getPntrToComponent("ftilde-"+num));
795 : }
796 : }
797 : } else {
798 18 : addComponent("sigmaMean"); componentIsNotPeriodic("sigmaMean");
799 18 : valueSigmaMean.push_back(getPntrToComponent("sigmaMean"));
800 12 : getPntrToComponent("sigmaMean")->set(sqrt(sigma_mean2_[0]));
801 18 : addComponent("sigma"); componentIsNotPeriodic("sigma");
802 18 : valueSigma.push_back(getPntrToComponent("sigma"));
803 12 : getPntrToComponent("sigma")->set(sigma_[0]);
804 : }
805 :
806 : // initialize random seed
807 : unsigned iseed;
808 19 : if(master) iseed = time(NULL)+replica_;
809 8 : else iseed = 0;
810 19 : comm.Sum(&iseed, 1);
811 38 : random[0].setSeed(-iseed);
812 : // Random chunk
813 19 : if(master) iseed = time(NULL)+replica_;
814 8 : else iseed = 0;
815 19 : comm.Sum(&iseed, 1);
816 38 : random[2].setSeed(-iseed);
817 19 : if(doscale_||dooffset_) {
818 : // in this case we want the same seed everywhere
819 14 : iseed = time(NULL);
820 14 : if(master&&nrep_>1) multi_sim_comm.Bcast(iseed,0);
821 14 : comm.Bcast(iseed,0);
822 28 : random[1].setSeed(-iseed);
823 : }
824 :
825 : // outfile stuff
826 19 : if(write_stride_>0) {
827 19 : sfile_.link(*this);
828 19 : sfile_.open(status_file_name_);
829 : }
830 :
831 57 : log<<" Bibliography "<<plumed.cite("Bonomi, Camilloni, Cavalli, Vendruscolo, Sci. Adv. 2, e150117 (2016)");
832 51 : if(do_reweight_) log<<plumed.cite("Bonomi, Camilloni, Vendruscolo, Sci. Rep. 6, 31232 (2016)");
833 19 : if(do_optsigmamean_>0) log<<plumed.cite("Loehr, Jussupow, Camilloni, J. Chem. Phys. 146, 165102 (2017)");
834 57 : log<<plumed.cite("Bonomi, Camilloni, Bioinformatics, 33, 3999 (2017)");
835 19 : log<<"\n";
836 19 : }
837 :
838 114 : Metainference::~Metainference()
839 : {
840 19 : if(sfile_.isOpen()) sfile_.close();
841 38 : }
842 :
843 156 : double Metainference::getEnergySP(const vector<double> &mean, const vector<double> &sigma,
844 : const double scale, const double offset)
845 : {
846 156 : const double scale2 = scale*scale;
847 156 : const double sm2 = sigma_mean2_[0];
848 156 : const double ss2 = sigma[0]*sigma[0] + scale2*sm2;
849 156 : const double sss = sigma[0]*sigma[0] + sm2;
850 :
851 : double ene = 0.0;
852 468 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(ene)
853 : {
854 312 : #pragma omp for reduction( + : ene)
855 : for(unsigned i=0; i<narg; ++i) {
856 1971 : const double dev = scale*mean[i]-parameters[i]+offset;
857 657 : const double a2 = 0.5*dev*dev + ss2;
858 657 : ene += std::log(2.0*a2/(1.0-exp(-a2/sm2)));
859 : }
860 : }
861 : // add one single Jeffrey's prior and one normalisation per data point
862 156 : ene += 0.5*std::log(sss) + static_cast<double>(narg)*0.5*std::log(0.5*M_PI*M_PI/ss2);
863 156 : if(doscale_ || doregres_zero_) ene += 0.5*std::log(sss);
864 156 : if(dooffset_) ene += 0.5*std::log(sss);
865 156 : return kbt_ * ene;
866 : }
867 :
868 144 : double Metainference::getEnergySPE(const vector<double> &mean, const vector<double> &sigma,
869 : const double scale, const double offset)
870 : {
871 144 : const double scale2 = scale*scale;
872 : double ene = 0.0;
873 432 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(ene)
874 : {
875 288 : #pragma omp for reduction( + : ene)
876 : for(unsigned i=0; i<narg; ++i) {
877 1152 : const double sm2 = sigma_mean2_[i];
878 576 : const double ss2 = sigma[i]*sigma[i] + scale2*sm2;
879 576 : const double sss = sigma[i]*sigma[i] + sm2;
880 1152 : const double dev = scale*mean[i]-parameters[i]+offset;
881 576 : const double a2 = 0.5*dev*dev + ss2;
882 576 : ene += 0.5*std::log(sss) + 0.5*std::log(0.5*M_PI*M_PI/ss2) + std::log(2.0*a2/(1.0-exp(-a2/sm2)));
883 1152 : if(doscale_ || doregres_zero_) ene += 0.5*std::log(sss);
884 576 : if(dooffset_) ene += 0.5*std::log(sss);
885 : }
886 : }
887 144 : return kbt_ * ene;
888 : }
889 :
890 48 : double Metainference::getEnergyMIGEN(const vector<double> &mean, const vector<double> &ftilde, const vector<double> &sigma,
891 : const double scale, const double offset)
892 : {
893 : double ene = 0.0;
894 144 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(ene)
895 : {
896 96 : #pragma omp for reduction( + : ene)
897 : for(unsigned i=0; i<narg; ++i) {
898 190 : const double inv_sb2 = 1./(sigma[i]*sigma[i]);
899 95 : const double inv_sm2 = 1./sigma_mean2_[i];
900 : double devb = 0;
901 285 : if(gen_likelihood_==LIKE_GAUSS) devb = scale*ftilde[i]-parameters[i]+offset;
902 0 : else if(gen_likelihood_==LIKE_LOGN) devb = std::log(scale*ftilde[i]/parameters[i]);
903 190 : double devm = mean[i] - ftilde[i];
904 : // deviation + normalisation + jeffrey
905 : double normb = 0.;
906 190 : if(gen_likelihood_==LIKE_GAUSS) normb = -0.5*std::log(0.5/M_PI*inv_sb2);
907 0 : else if(gen_likelihood_==LIKE_LOGN) normb = -0.5*std::log(0.5/M_PI*inv_sb2/(parameters[i]*parameters[i]));
908 95 : const double normm = -0.5*std::log(0.5/M_PI*inv_sm2);
909 95 : const double jeffreys = -0.5*std::log(2.*inv_sb2);
910 95 : ene += 0.5*devb*devb*inv_sb2 + 0.5*devm*devm*inv_sm2 + normb + normm + jeffreys;
911 95 : if(doscale_ || doregres_zero_) ene += jeffreys;
912 95 : if(dooffset_) ene += jeffreys;
913 : }
914 : }
915 48 : return kbt_ * ene;
916 : }
917 :
918 36 : double Metainference::getEnergyGJ(const vector<double> &mean, const vector<double> &sigma,
919 : const double scale, const double offset)
920 : {
921 36 : const double scale2 = scale*scale;
922 72 : const double inv_s2 = 1./(sigma[0]*sigma[0] + scale2*sigma_mean2_[0]);
923 36 : const double inv_sss = 1./(sigma[0]*sigma[0] + sigma_mean2_[0]);
924 :
925 : double ene = 0.0;
926 108 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(ene)
927 : {
928 72 : #pragma omp for reduction( + : ene)
929 : for(unsigned i=0; i<narg; ++i) {
930 210 : double dev = scale*mean[i]-parameters[i]+offset;
931 70 : ene += 0.5*dev*dev*inv_s2;
932 : }
933 : }
934 36 : const double normalisation = -0.5*std::log(0.5/M_PI*inv_s2);
935 36 : const double jeffreys = -0.5*std::log(2.*inv_sss);
936 : // add Jeffrey's prior in case one sigma for all data points + one normalisation per datapoint
937 36 : ene += jeffreys + static_cast<double>(narg)*normalisation;
938 36 : if(doscale_ || doregres_zero_) ene += jeffreys;
939 36 : if(dooffset_) ene += jeffreys;
940 :
941 36 : return kbt_ * ene;
942 : }
943 :
944 152 : double Metainference::getEnergyGJE(const vector<double> &mean, const vector<double> &sigma,
945 : const double scale, const double offset)
946 : {
947 152 : const double scale2 = scale*scale;
948 :
949 : double ene = 0.0;
950 456 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(ene)
951 : {
952 304 : #pragma omp for reduction( + : ene)
953 : for(unsigned i=0; i<narg; ++i) {
954 15849 : const double inv_s2 = 1./(sigma[i]*sigma[i] + scale2*sigma_mean2_[i]);
955 5283 : const double inv_sss = 1./(sigma[i]*sigma[i] + sigma_mean2_[i]);
956 10566 : double dev = scale*mean[i]-parameters[i]+offset;
957 : // deviation + normalisation + jeffrey
958 5283 : const double normalisation = -0.5*std::log(0.5/M_PI*inv_s2);
959 5283 : const double jeffreys = -0.5*std::log(2.*inv_sss);
960 5283 : ene += 0.5*dev*dev*inv_s2 + normalisation + jeffreys;
961 5856 : if(doscale_ || doregres_zero_) ene += jeffreys;
962 5283 : if(dooffset_) ene += jeffreys;
963 : }
964 : }
965 152 : return kbt_ * ene;
966 : }
967 :
968 178 : double Metainference::doMonteCarlo(const vector<double> &mean_)
969 : {
970 : // calculate old energy with the updated coordinates
971 178 : double old_energy=0.;
972 :
973 178 : switch(noise_type_) {
974 : case GAUSS:
975 12 : old_energy = getEnergyGJ(mean_,sigma_,scale_,offset_);
976 12 : break;
977 : case MGAUSS:
978 52 : old_energy = getEnergyGJE(mean_,sigma_,scale_,offset_);
979 52 : break;
980 : case OUTLIERS:
981 54 : old_energy = getEnergySP(mean_,sigma_,scale_,offset_);
982 54 : break;
983 : case MOUTLIERS:
984 48 : old_energy = getEnergySPE(mean_,sigma_,scale_,offset_);
985 48 : break;
986 : case GENERIC:
987 12 : old_energy = getEnergyMIGEN(mean_,ftilde_,sigma_,scale_,offset_);
988 12 : break;
989 : }
990 :
991 178 : if(!getExchangeStep()) {
992 : // Create vector of random sigma indices
993 : vector<unsigned> indices;
994 178 : if (MCchunksize_ > 0) {
995 0 : for (unsigned j=0; j<sigma_.size(); j++) {
996 0 : indices.push_back(j);
997 : }
998 0 : random[2].Shuffle(indices);
999 : }
1000 : bool breaknow = false;
1001 :
1002 : // cycle on MC steps
1003 534 : for(unsigned i=0; i<MCsteps_; ++i) {
1004 :
1005 178 : MCtrial_++;
1006 :
1007 : // propose move for ftilde
1008 178 : vector<double> new_ftilde(sigma_.size());
1009 178 : new_ftilde = ftilde_;
1010 :
1011 178 : if(noise_type_==GENERIC) {
1012 : // change all sigmas
1013 60 : for(unsigned j=0; j<sigma_.size(); j++) {
1014 24 : const double r3 = random[0].Gaussian();
1015 48 : const double ds3 = Dftilde_*sqrt(sigma_mean2_[j])*r3;
1016 24 : new_ftilde[j] = ftilde_[j] + ds3;
1017 : }
1018 : // calculate new energy
1019 12 : double new_energy = getEnergyMIGEN(mean_,new_ftilde,sigma_,scale_,offset_);
1020 :
1021 : // accept or reject
1022 12 : const double delta = ( new_energy - old_energy ) / kbt_;
1023 : // if delta is negative always accept move
1024 12 : if( delta <= 0.0 ) {
1025 12 : old_energy = new_energy;
1026 12 : ftilde_ = new_ftilde;
1027 12 : MCacceptFT_++;
1028 : // otherwise extract random number
1029 : } else {
1030 0 : const double s = random[0].RandU01();
1031 0 : if( s < exp(-delta) ) {
1032 0 : old_energy = new_energy;
1033 0 : ftilde_ = new_ftilde;
1034 0 : MCacceptFT_++;
1035 : }
1036 : }
1037 : }
1038 :
1039 : // propose move for scale and/or offset
1040 178 : double new_scale = scale_;
1041 178 : double new_offset = offset_;
1042 178 : if(doscale_||dooffset_) {
1043 168 : if(doscale_) {
1044 144 : if(scale_prior_==SC_FLAT) {
1045 144 : const double r1 = random[1].Gaussian();
1046 144 : const double ds1 = Dscale_*r1;
1047 144 : new_scale += ds1;
1048 : // check boundaries
1049 144 : if(new_scale > scale_max_) {new_scale = 2.0 * scale_max_ - new_scale;}
1050 144 : if(new_scale < scale_min_) {new_scale = 2.0 * scale_min_ - new_scale;}
1051 : } else {
1052 0 : const double r1 = random[1].Gaussian();
1053 0 : const double ds1 = 0.5*(scale_mu_-new_scale)+Dscale_*exp(1)/M_PI*r1;
1054 0 : new_scale += ds1;
1055 : }
1056 : }
1057 :
1058 168 : if(dooffset_) {
1059 24 : if(offset_prior_==SC_FLAT) {
1060 24 : const double r1 = random[1].Gaussian();
1061 24 : const double ds1 = Doffset_*r1;
1062 24 : new_offset += ds1;
1063 : // check boundaries
1064 24 : if(new_offset > offset_max_) {new_offset = 2.0 * offset_max_ - new_offset;}
1065 24 : if(new_offset < offset_min_) {new_offset = 2.0 * offset_min_ - new_offset;}
1066 : } else {
1067 0 : const double r1 = random[1].Gaussian();
1068 0 : const double ds1 = 0.5*(offset_mu_-new_offset)+Doffset_*exp(1)/M_PI*r1;
1069 0 : new_offset += ds1;
1070 : }
1071 : }
1072 :
1073 : // calculate new energy
1074 : double new_energy = 0.;
1075 :
1076 168 : switch(noise_type_) {
1077 : case GAUSS:
1078 12 : new_energy = getEnergyGJ(mean_,sigma_,new_scale,new_offset);
1079 : break;
1080 : case MGAUSS:
1081 48 : new_energy = getEnergyGJE(mean_,sigma_,new_scale,new_offset);
1082 : break;
1083 : case OUTLIERS:
1084 48 : new_energy = getEnergySP(mean_,sigma_,new_scale,new_offset);
1085 : break;
1086 : case MOUTLIERS:
1087 48 : new_energy = getEnergySPE(mean_,sigma_,new_scale,new_offset);
1088 : break;
1089 : case GENERIC:
1090 12 : new_energy = getEnergyMIGEN(mean_,ftilde_,sigma_,new_scale,new_offset);
1091 : break;
1092 : }
1093 : // for the scale we need to consider the total energy
1094 168 : vector<double> totenergies(2);
1095 168 : if(master) {
1096 96 : totenergies[0] = old_energy;
1097 96 : totenergies[1] = new_energy;
1098 96 : if(nrep_>1) multi_sim_comm.Sum(totenergies);
1099 : } else {
1100 72 : totenergies[0] = 0;
1101 72 : totenergies[1] = 0;
1102 : }
1103 168 : comm.Sum(totenergies);
1104 :
1105 : // accept or reject
1106 168 : const double delta = ( totenergies[1] - totenergies[0] ) / kbt_;
1107 : // if delta is negative always accept move
1108 168 : if( delta <= 0.0 ) {
1109 168 : old_energy = new_energy;
1110 168 : scale_ = new_scale;
1111 168 : offset_ = new_offset;
1112 168 : MCacceptScale_++;
1113 : // otherwise extract random number
1114 : } else {
1115 0 : double s = random[1].RandU01();
1116 0 : if( s < exp(-delta) ) {
1117 0 : old_energy = new_energy;
1118 0 : scale_ = new_scale;
1119 0 : offset_ = new_offset;
1120 0 : MCacceptScale_++;
1121 : }
1122 : }
1123 : }
1124 :
1125 : // propose move for sigma
1126 178 : vector<double> new_sigma(sigma_.size());
1127 178 : new_sigma = sigma_;
1128 :
1129 : // change MCchunksize_ sigmas
1130 178 : if (MCchunksize_ > 0) {
1131 0 : if ((MCchunksize_ * i) >= sigma_.size()) {
1132 : // This means we are not moving any sigma, so we should break immediately
1133 : breaknow = true;
1134 : }
1135 :
1136 : // change random sigmas
1137 0 : for(unsigned j=0; j<MCchunksize_; j++) {
1138 0 : const unsigned shuffle_index = j + MCchunksize_ * i;
1139 0 : if (shuffle_index >= sigma_.size()) {
1140 : // Going any further will segfault but we should still evaluate the sigmas we changed
1141 : break;
1142 : }
1143 0 : const unsigned index = indices[shuffle_index];
1144 0 : const double r2 = random[0].Gaussian();
1145 0 : const double ds2 = Dsigma_[index]*r2;
1146 0 : new_sigma[index] = sigma_[index] + ds2;
1147 : // check boundaries
1148 0 : if(new_sigma[index] > sigma_max_[index]) {new_sigma[index] = 2.0 * sigma_max_[index] - new_sigma[index];}
1149 0 : if(new_sigma[index] < sigma_min_[index]) {new_sigma[index] = 2.0 * sigma_min_[index] - new_sigma[index];}
1150 : }
1151 : } else {
1152 : // change all sigmas
1153 5838 : for(unsigned j=0; j<sigma_.size(); j++) {
1154 2830 : const double r2 = random[0].Gaussian();
1155 2830 : const double ds2 = Dsigma_[j]*r2;
1156 2830 : new_sigma[j] = sigma_[j] + ds2;
1157 : // check boundaries
1158 5660 : if(new_sigma[j] > sigma_max_[j]) {new_sigma[j] = 2.0 * sigma_max_[j] - new_sigma[j];}
1159 5660 : if(new_sigma[j] < sigma_min_[j]) {new_sigma[j] = 2.0 * sigma_min_[j] - new_sigma[j];}
1160 : }
1161 : }
1162 :
1163 178 : if (breaknow) {
1164 : // We didnt move any sigmas, so no sense in evaluating anything
1165 : break;
1166 : }
1167 :
1168 : // calculate new energy
1169 : double new_energy = 0.;
1170 178 : switch(noise_type_) {
1171 : case GAUSS:
1172 12 : new_energy = getEnergyGJ(mean_,new_sigma,scale_,offset_);
1173 : break;
1174 : case MGAUSS:
1175 52 : new_energy = getEnergyGJE(mean_,new_sigma,scale_,offset_);
1176 : break;
1177 : case OUTLIERS:
1178 54 : new_energy = getEnergySP(mean_,new_sigma,scale_,offset_);
1179 : break;
1180 : case MOUTLIERS:
1181 48 : new_energy = getEnergySPE(mean_,new_sigma,scale_,offset_);
1182 : break;
1183 : case GENERIC:
1184 12 : new_energy = getEnergyMIGEN(mean_,ftilde_,new_sigma,scale_,offset_);
1185 : break;
1186 : }
1187 :
1188 : // accept or reject
1189 178 : const double delta = ( new_energy - old_energy ) / kbt_;
1190 : // if delta is negative always accept move
1191 178 : if( delta <= 0.0 ) {
1192 178 : old_energy = new_energy;
1193 178 : sigma_ = new_sigma;
1194 178 : MCaccept_++;
1195 : // otherwise extract random number
1196 : } else {
1197 0 : const double s = random[0].RandU01();
1198 0 : if( s < exp(-delta) ) {
1199 0 : old_energy = new_energy;
1200 0 : sigma_ = new_sigma;
1201 0 : MCaccept_++;
1202 : }
1203 : }
1204 :
1205 : }
1206 :
1207 : /* save the result of the sampling */
1208 178 : double accept = static_cast<double>(MCaccept_) / static_cast<double>(MCtrial_);
1209 178 : valueAccept->set(accept);
1210 178 : if(doscale_ || doregres_zero_) valueScale->set(scale_);
1211 178 : if(dooffset_) valueOffset->set(offset_);
1212 178 : if(doscale_||dooffset_) {
1213 168 : accept = static_cast<double>(MCacceptScale_) / static_cast<double>(MCtrial_);
1214 168 : valueAcceptScale->set(accept);
1215 : }
1216 11498 : for(unsigned i=0; i<sigma_.size(); i++) valueSigma[i]->set(sigma_[i]);
1217 178 : if(noise_type_==GENERIC) {
1218 12 : accept = static_cast<double>(MCacceptFT_) / static_cast<double>(MCtrial_);
1219 12 : valueAcceptFT->set(accept);
1220 108 : for(unsigned i=0; i<sigma_.size(); i++) valueFtilde[i]->set(ftilde_[i]);
1221 : }
1222 : }
1223 :
1224 : // here we sum the score over the replicas to get the full metainference score that we save as a bias
1225 178 : if(master) {
1226 104 : if(nrep_>1) multi_sim_comm.Sum(old_energy);
1227 : } else {
1228 74 : old_energy=0;
1229 : }
1230 178 : comm.Sum(old_energy);
1231 :
1232 178 : return old_energy;
1233 : }
1234 :
1235 : /*
1236 : In the following energy-force functions we don't add the normalisation and the jeffreys priors
1237 : because they are not needed for the forces, the correct MetaInference energy is the one calculated
1238 : in the Monte-Carlo
1239 : */
1240 :
1241 54 : void Metainference::getEnergyForceSP(const vector<double> &mean, const vector<double> &dmean_x,
1242 : const vector<double> &dmean_b)
1243 : {
1244 54 : const double scale2 = scale_*scale_;
1245 54 : const double sm2 = sigma_mean2_[0];
1246 54 : const double ss2 = sigma_[0]*sigma_[0] + scale2*sm2;
1247 54 : vector<double> f(narg,0);
1248 :
1249 54 : if(master) {
1250 87 : #pragma omp parallel num_threads(OpenMP::getNumThreads())
1251 : {
1252 57 : #pragma omp for
1253 : for(unsigned i=0; i<narg; ++i) {
1254 408 : const double dev = scale_*mean[i]-parameters[i]+offset_;
1255 136 : const double a2 = 0.5*dev*dev + ss2;
1256 136 : const double t = exp(-a2/sm2);
1257 136 : const double dt = 1./t;
1258 136 : const double dit = 1./(1.-dt);
1259 272 : f[i] = -scale_*dev*(dit/sm2 + 1./a2);
1260 : }
1261 : }
1262 : // collect contribution to forces and energy from other replicas
1263 54 : if(nrep_>1) multi_sim_comm.Sum(&f[0],narg);
1264 : }
1265 : // intra-replica summation
1266 108 : comm.Sum(&f[0],narg);
1267 :
1268 : double w_tmp = 0.;
1269 234 : for(unsigned i=0; i<narg; ++i) {
1270 702 : setOutputForce(i, kbt_*f[i]*dmean_x[i]);
1271 702 : w_tmp += kbt_*f[i]*dmean_b[i];
1272 : }
1273 :
1274 54 : if(do_reweight_) {
1275 48 : setOutputForce(narg, w_tmp);
1276 96 : getPntrToComponent("biasDer")->set(-w_tmp);
1277 : }
1278 54 : }
1279 :
1280 48 : void Metainference::getEnergyForceSPE(const vector<double> &mean, const vector<double> &dmean_x,
1281 : const vector<double> &dmean_b)
1282 : {
1283 48 : const double scale2 = scale_*scale_;
1284 48 : vector<double> f(narg,0);
1285 :
1286 48 : if(master) {
1287 71 : #pragma omp parallel num_threads(OpenMP::getNumThreads())
1288 : {
1289 47 : #pragma omp for
1290 : for(unsigned i=0; i<narg; ++i) {
1291 192 : const double sm2 = sigma_mean2_[i];
1292 96 : const double ss2 = sigma_[i]*sigma_[i] + scale2*sm2;
1293 288 : const double dev = scale_*mean[i]-parameters[i]+offset_;
1294 96 : const double a2 = 0.5*dev*dev + ss2;
1295 96 : const double t = exp(-a2/sm2);
1296 96 : const double dt = 1./t;
1297 96 : const double dit = 1./(1.-dt);
1298 192 : f[i] = -scale_*dev*(dit/sm2 + 1./a2);
1299 : }
1300 : }
1301 : // collect contribution to forces and energy from other replicas
1302 48 : if(nrep_>1) multi_sim_comm.Sum(&f[0],narg);
1303 : }
1304 96 : comm.Sum(&f[0],narg);
1305 :
1306 : double w_tmp = 0.;
1307 192 : for(unsigned i=0; i<narg; ++i) {
1308 576 : setOutputForce(i, kbt_ * dmean_x[i] * f[i]);
1309 576 : w_tmp += kbt_ * dmean_b[i] *f[i];
1310 : }
1311 :
1312 48 : if(do_reweight_) {
1313 48 : setOutputForce(narg, w_tmp);
1314 96 : getPntrToComponent("biasDer")->set(-w_tmp);
1315 : }
1316 48 : }
1317 :
1318 12 : void Metainference::getEnergyForceGJ(const vector<double> &mean, const vector<double> &dmean_x,
1319 : const vector<double> &dmean_b)
1320 : {
1321 12 : const double scale2 = scale_*scale_;
1322 12 : double inv_s2=0.;
1323 :
1324 12 : if(master) {
1325 24 : inv_s2 = 1./(sigma_[0]*sigma_[0] + scale2*sigma_mean2_[0]);
1326 12 : if(nrep_>1) multi_sim_comm.Sum(inv_s2);
1327 : }
1328 12 : comm.Sum(inv_s2);
1329 :
1330 : double w_tmp = 0.;
1331 36 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(w_tmp)
1332 : {
1333 24 : #pragma omp for reduction( + : w_tmp)
1334 : for(unsigned i=0; i<narg; ++i) {
1335 66 : const double dev = scale_*mean[i]-parameters[i]+offset_;
1336 22 : const double mult = dev*scale_*inv_s2;
1337 44 : setOutputForce(i, -kbt_*dmean_x[i]*mult);
1338 44 : w_tmp += kbt_*dmean_b[i]*mult;
1339 : }
1340 : }
1341 :
1342 12 : if(do_reweight_) {
1343 0 : setOutputForce(narg, -w_tmp);
1344 0 : getPntrToComponent("biasDer")->set(w_tmp);
1345 : }
1346 12 : }
1347 :
1348 52 : void Metainference::getEnergyForceGJE(const vector<double> &mean, const vector<double> &dmean_x,
1349 : const vector<double> &dmean_b)
1350 : {
1351 52 : const double scale2 = scale_*scale_;
1352 104 : vector<double> inv_s2(sigma_.size(),0.);
1353 :
1354 52 : if(master) {
1355 3848 : for(unsigned i=0; i<sigma_.size(); ++i) inv_s2[i] = 1./(sigma_[i]*sigma_[i] + scale2*sigma_mean2_[i]);
1356 52 : if(nrep_>1) multi_sim_comm.Sum(&inv_s2[0],sigma_.size());
1357 : }
1358 104 : comm.Sum(&inv_s2[0],sigma_.size());
1359 :
1360 : double w_tmp = 0.;
1361 156 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(w_tmp)
1362 : {
1363 104 : #pragma omp for reduction( + : w_tmp)
1364 : for(unsigned i=0; i<narg; ++i) {
1365 7644 : const double dev = scale_*mean[i]-parameters[i]+offset_;
1366 5096 : const double mult = dev*scale_*inv_s2[i];
1367 5096 : setOutputForce(i, -kbt_*dmean_x[i]*mult);
1368 5096 : w_tmp += kbt_*dmean_b[i]*mult;
1369 : }
1370 : }
1371 :
1372 52 : if(do_reweight_) {
1373 52 : setOutputForce(narg, -w_tmp);
1374 104 : getPntrToComponent("biasDer")->set(w_tmp);
1375 : }
1376 52 : }
1377 :
1378 12 : void Metainference::getEnergyForceMIGEN(const vector<double> &mean, const vector<double> &dmean_x, const vector<double> &dmean_b)
1379 : {
1380 24 : vector<double> inv_s2(sigma_.size(),0.);
1381 24 : vector<double> dev(sigma_.size(),0.);
1382 24 : vector<double> dev2(sigma_.size(),0.);
1383 :
1384 72 : for(unsigned i=0; i<sigma_.size(); ++i) {
1385 24 : inv_s2[i] = 1./sigma_mean2_[i];
1386 24 : if(master) {
1387 48 : dev[i] = (mean[i]-ftilde_[i]);
1388 24 : dev2[i] = dev[i]*dev[i];
1389 : }
1390 : }
1391 12 : if(master&&nrep_>1) {
1392 0 : multi_sim_comm.Sum(&dev[0],dev.size());
1393 0 : multi_sim_comm.Sum(&dev2[0],dev2.size());
1394 : }
1395 12 : comm.Sum(&dev[0],dev.size());
1396 12 : comm.Sum(&dev2[0],dev2.size());
1397 :
1398 : double dene_b = 0.;
1399 36 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(dene_b)
1400 : {
1401 24 : #pragma omp for reduction( + : dene_b)
1402 : for(unsigned i=0; i<narg; ++i) {
1403 88 : const double dene_x = kbt_*inv_s2[i]*dmean_x[i]*dev[i];
1404 22 : dene_b += kbt_*inv_s2[i]*dmean_b[i]*dev[i];
1405 22 : setOutputForce(i, -dene_x);
1406 : }
1407 : }
1408 :
1409 12 : if(do_reweight_) {
1410 0 : setOutputForce(narg, -dene_b);
1411 0 : getPntrToComponent("biasDer")->set(dene_b);
1412 : }
1413 12 : }
1414 :
1415 178 : void Metainference::get_weights(const unsigned iselect, double &fact, double &var_fact)
1416 : {
1417 178 : const double dnrep = static_cast<double>(nrep_);
1418 178 : const double ave_fact = 1.0/dnrep;
1419 :
1420 : double norm = 0.0;
1421 :
1422 : // calculate the weights either from BIAS
1423 178 : if(do_reweight_) {
1424 148 : vector<double> bias(nrep_,0);
1425 148 : if(master) {
1426 222 : bias[replica_] = getArgument(narg);
1427 148 : if(nrep_>1) multi_sim_comm.Sum(&bias[0], nrep_);
1428 : }
1429 296 : comm.Sum(&bias[0], nrep_);
1430 :
1431 296 : const double maxbias = *(std::max_element(bias.begin(), bias.end()));
1432 444 : for(unsigned i=0; i<nrep_; ++i) {
1433 592 : bias[i] = exp((bias[i]-maxbias)/kbt_);
1434 296 : norm += bias[i];
1435 : }
1436 :
1437 : // accumulate weights
1438 148 : const double decay = 1./static_cast<double> (average_weights_stride_);
1439 296 : if(!firstTimeW[iselect]) {
1440 264 : for(unsigned i=0; i<nrep_; ++i) {
1441 792 : const double delta=bias[i]/norm-average_weights_[iselect][i];
1442 264 : average_weights_[iselect][i]+=decay*delta;
1443 : }
1444 : } else {
1445 : firstTimeW[iselect] = false;
1446 48 : for(unsigned i=0; i<nrep_; ++i) {
1447 64 : average_weights_[iselect][i] = bias[i]/norm;
1448 : }
1449 : }
1450 :
1451 : // set average back into bias and set norm to one
1452 592 : for(unsigned i=0; i<nrep_; ++i) bias[i] = average_weights_[iselect][i];
1453 : // set local weight, norm and weight variance
1454 296 : fact = bias[replica_];
1455 : norm = 1.0;
1456 444 : for(unsigned i=0; i<nrep_; ++i) var_fact += (bias[i]/norm-ave_fact)*(bias[i]/norm-ave_fact);
1457 296 : getPntrToComponent("weight")->set(fact);
1458 : } else {
1459 : // or arithmetic ones
1460 : norm = dnrep;
1461 30 : fact = 1.0/norm;
1462 : }
1463 178 : }
1464 :
1465 178 : void Metainference::get_sigma_mean(const unsigned iselect, const double fact, const double var_fact, const vector<double> &mean)
1466 : {
1467 178 : const double dnrep = static_cast<double>(nrep_);
1468 178 : const double ave_fact = 1.0/dnrep;
1469 :
1470 356 : vector<double> sigma_mean2_tmp(sigma_mean2_.size(), 0.);
1471 :
1472 178 : if(do_optsigmamean_>0) {
1473 : // remove first entry of the history vector
1474 0 : if(sigma_mean2_last_[iselect][0].size()==optsigmamean_stride_&&optsigmamean_stride_>0)
1475 0 : for(unsigned i=0; i<narg; ++i) sigma_mean2_last_[iselect][i].erase(sigma_mean2_last_[iselect][i].begin());
1476 : /* this is the current estimate of sigma mean for each argument
1477 : there is one of this per argument in any case because it is
1478 : the maximum among these to be used in case of GAUSS/OUTLIER */
1479 0 : vector<double> sigma_mean2_now(narg,0);
1480 0 : if(do_reweight_) {
1481 0 : if(master) {
1482 0 : for(unsigned i=0; i<narg; ++i) {
1483 0 : double tmp1 = (fact*getArgument(i)-ave_fact*mean[i])*(fact*getArgument(i)-ave_fact*mean[i]);
1484 0 : double tmp2 = -2.*mean[i]*(fact-ave_fact)*(fact*getArgument(i)-ave_fact*mean[i]);
1485 0 : sigma_mean2_now[i] = tmp1 + tmp2;
1486 : }
1487 0 : if(nrep_>1) multi_sim_comm.Sum(&sigma_mean2_now[0], narg);
1488 : }
1489 0 : comm.Sum(&sigma_mean2_now[0], narg);
1490 0 : for(unsigned i=0; i<narg; ++i) sigma_mean2_now[i] = dnrep/(dnrep-1.)*(sigma_mean2_now[i] + mean[i]*mean[i]*var_fact);
1491 : } else {
1492 0 : if(master) {
1493 0 : for(unsigned i=0; i<narg; ++i) {
1494 0 : double tmp = getArgument(i)-mean[i];
1495 0 : sigma_mean2_now[i] = fact*tmp*tmp;
1496 : }
1497 0 : if(nrep_>1) multi_sim_comm.Sum(&sigma_mean2_now[0], narg);
1498 : }
1499 0 : comm.Sum(&sigma_mean2_now[0], narg);
1500 0 : for(unsigned i=0; i<narg; ++i) sigma_mean2_now[i] /= dnrep;
1501 : }
1502 :
1503 : // add sigma_mean2 to history
1504 0 : if(optsigmamean_stride_>0) {
1505 0 : for(unsigned i=0; i<narg; ++i) sigma_mean2_last_[iselect][i].push_back(sigma_mean2_now[i]);
1506 : } else {
1507 0 : for(unsigned i=0; i<narg; ++i) if(sigma_mean2_now[i] > sigma_mean2_last_[iselect][i][0]) sigma_mean2_last_[iselect][i][0] = sigma_mean2_now[i];
1508 : }
1509 :
1510 0 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
1511 0 : for(unsigned i=0; i<narg; ++i) {
1512 : /* set to the maximum in history vector */
1513 0 : sigma_mean2_tmp[i] = *max_element(sigma_mean2_last_[iselect][i].begin(), sigma_mean2_last_[iselect][i].end());
1514 : /* the standard error of the mean */
1515 0 : valueSigmaMean[i]->set(sqrt(sigma_mean2_tmp[i]));
1516 0 : if(noise_type_==GENERIC) {
1517 0 : sigma_min_[i] = sqrt(sigma_mean2_tmp[i]);
1518 0 : if(sigma_[i] < sigma_min_[i]) sigma_[i] = sigma_min_[i];
1519 : }
1520 : }
1521 0 : } else if(noise_type_==GAUSS||noise_type_==OUTLIERS) {
1522 : // find maximum for each data point
1523 : vector <double> max_values;
1524 0 : for(unsigned i=0; i<narg; ++i) max_values.push_back(*max_element(sigma_mean2_last_[iselect][i].begin(), sigma_mean2_last_[iselect][i].end()));
1525 : // find maximum across data points
1526 0 : const double max_now = *max_element(max_values.begin(), max_values.end());
1527 : // set new value
1528 0 : sigma_mean2_tmp[0] = max_now;
1529 0 : valueSigmaMean[0]->set(sqrt(sigma_mean2_tmp[0]));
1530 : }
1531 : // endif sigma optimization
1532 : } else {
1533 178 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
1534 2764 : for(unsigned i=0; i<narg; ++i) {
1535 8292 : sigma_mean2_tmp[i] = sigma_mean2_last_[iselect][i][0];
1536 5528 : valueSigmaMean[i]->set(sqrt(sigma_mean2_tmp[i]));
1537 : }
1538 66 : } else if(noise_type_==GAUSS||noise_type_==OUTLIERS) {
1539 132 : sigma_mean2_tmp[0] = sigma_mean2_last_[iselect][0][0];
1540 132 : valueSigmaMean[0]->set(sqrt(sigma_mean2_tmp[0]));
1541 : }
1542 : }
1543 :
1544 178 : sigma_mean2_ = sigma_mean2_tmp;
1545 178 : }
1546 :
1547 178 : void Metainference::replica_averaging(const double fact, vector<double> &mean, vector<double> &dmean_b)
1548 : {
1549 178 : if(master) {
1550 3112 : for(unsigned i=0; i<narg; ++i) mean[i] = fact*getArgument(i);
1551 178 : if(nrep_>1) multi_sim_comm.Sum(&mean[0], narg);
1552 : }
1553 356 : comm.Sum(&mean[0], narg);
1554 : // set the derivative of the mean with respect to the bias
1555 6222 : for(unsigned i=0; i<narg; ++i) dmean_b[i] = fact/kbt_*(getArgument(i)-mean[i])/static_cast<double>(average_weights_stride_);
1556 :
1557 : // this is only for generic metainference
1558 178 : if(firstTime) {ftilde_ = mean; firstTime = false;}
1559 178 : }
1560 :
1561 0 : void Metainference::do_regression_zero(const vector<double> &mean)
1562 : {
1563 : // parameters[i] = scale_ * mean[i]: find scale_ with linear regression
1564 : double num = 0.0;
1565 : double den = 0.0;
1566 0 : for(unsigned i=0; i<parameters.size(); ++i) {
1567 0 : num += mean[i] * parameters[i];
1568 0 : den += mean[i] * mean[i];
1569 : }
1570 0 : if(den>0) {
1571 0 : scale_ = num / den;
1572 : } else {
1573 0 : scale_ = 1.0;
1574 : }
1575 0 : }
1576 :
1577 178 : void Metainference::calculate()
1578 : {
1579 : // get step
1580 178 : const long int step = getStep();
1581 :
1582 : unsigned iselect = 0;
1583 : // set the value of selector for REM-like stuff
1584 178 : if(selector_.length()>0) iselect = static_cast<unsigned>(plumed.passMap[selector_]);
1585 :
1586 178 : double fact = 0.0;
1587 178 : double var_fact = 0.0;
1588 : // get weights for ensemble average
1589 178 : get_weights(iselect, fact, var_fact);
1590 : // calculate the mean
1591 178 : vector<double> mean(narg,0);
1592 : // this is the derivative of the mean with respect to the argument
1593 178 : vector<double> dmean_x(narg,fact);
1594 : // this is the derivative of the mean with respect to the bias
1595 178 : vector<double> dmean_b(narg,0);
1596 : // calculate it
1597 178 : replica_averaging(fact, mean, dmean_b);
1598 : // calculate sigma mean
1599 178 : get_sigma_mean(iselect, fact, var_fact, mean);
1600 : // in case of regression with zero intercept, calculate scale
1601 178 : if(doregres_zero_ && step%nregres_zero_==0) do_regression_zero(mean);
1602 :
1603 :
1604 : /* MONTE CARLO */
1605 178 : double ene = doMonteCarlo(mean);
1606 :
1607 : // calculate bias and forces
1608 178 : switch(noise_type_) {
1609 : case GAUSS:
1610 12 : getEnergyForceGJ(mean, dmean_x, dmean_b);
1611 : break;
1612 : case MGAUSS:
1613 52 : getEnergyForceGJE(mean, dmean_x, dmean_b);
1614 : break;
1615 : case OUTLIERS:
1616 54 : getEnergyForceSP(mean, dmean_x, dmean_b);
1617 : break;
1618 : case MOUTLIERS:
1619 48 : getEnergyForceSPE(mean, dmean_x, dmean_b);
1620 : break;
1621 : case GENERIC:
1622 12 : getEnergyForceMIGEN(mean, dmean_x, dmean_b);
1623 : break;
1624 : }
1625 :
1626 : // set value of the bias
1627 : setBias(ene);
1628 178 : }
1629 :
1630 19 : void Metainference::writeStatus()
1631 : {
1632 19 : sfile_.rewind();
1633 38 : sfile_.printField("time",getTimeStep()*getStep());
1634 : //nsel
1635 76 : for(unsigned i=0; i<sigma_mean2_last_.size(); i++) {
1636 : std::string msg_i,msg_j;
1637 19 : Tools::convert(i,msg_i);
1638 : vector <double> max_values;
1639 : //narg
1640 2434 : for(unsigned j=0; j<narg; ++j) {
1641 2415 : Tools::convert(j,msg_j);
1642 4830 : std::string msg = msg_i+"_"+msg_j;
1643 2415 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
1644 9560 : sfile_.printField("sigmaMean_"+msg,sqrt(*max_element(sigma_mean2_last_[i][j].begin(), sigma_mean2_last_[i][j].end())));
1645 : } else {
1646 : // find maximum for each data point
1647 75 : max_values.push_back(*max_element(sigma_mean2_last_[i][j].begin(), sigma_mean2_last_[i][j].end()));
1648 : }
1649 : }
1650 19 : if(noise_type_==GAUSS||noise_type_==OUTLIERS) {
1651 : // find maximum across data points
1652 12 : const double max_now = sqrt(*max_element(max_values.begin(), max_values.end()));
1653 6 : Tools::convert(0,msg_j);
1654 12 : std::string msg = msg_i+"_"+msg_j;
1655 12 : sfile_.printField("sigmaMean_"+msg, max_now);
1656 : }
1657 : }
1658 4811 : for(unsigned i=0; i<sigma_.size(); ++i) {
1659 : std::string msg;
1660 2396 : Tools::convert(i,msg);
1661 4792 : sfile_.printField("sigma_"+msg,sigma_[i]);
1662 : }
1663 19 : if(noise_type_==GENERIC) {
1664 5 : for(unsigned i=0; i<ftilde_.size(); ++i) {
1665 : std::string msg;
1666 2 : Tools::convert(i,msg);
1667 4 : sfile_.printField("ftilde_"+msg,ftilde_[i]);
1668 : }
1669 : }
1670 38 : sfile_.printField("scale0_",scale_);
1671 38 : sfile_.printField("offset0_",offset_);
1672 76 : for(unsigned i=0; i<average_weights_.size(); i++) {
1673 : std::string msg_i;
1674 19 : Tools::convert(i,msg_i);
1675 57 : sfile_.printField("weight_"+msg_i,average_weights_[i][replica_]);
1676 : }
1677 19 : sfile_.printField();
1678 19 : sfile_.flush();
1679 19 : }
1680 :
1681 178 : void Metainference::update() {
1682 : // write status file
1683 178 : if(write_stride_>0&& (getStep()%write_stride_==0 || getCPT()) ) writeStatus();
1684 178 : }
1685 :
1686 : }
1687 5874 : }
1688 :
|