SHOGUN  6.1.3
VarDTCInferenceMethod.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (W) 2015 Wu Lin
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  *
30  * This code specifically adapted from function in varsgpLikelihood.m and varsgpPredict.m
31  *
32  * The reference paper is
33  * Titsias, Michalis K.
34  * "Variational learning of inducing variables in sparse Gaussian processes."
35  * International Conference on Artificial Intelligence and Statistics. 2009.
36  *
37  */
38 
44 
45 using namespace shogun;
46 using namespace Eigen;
47 
49 {
50  init();
51 }
52 
54  CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod, CFeatures* lat)
55  : CSingleSparseInference(kern, feat, m, lab, mod, lat)
56 {
57  init();
58 }
59 
60 void CVarDTCInferenceMethod::init()
61 {
62  m_yy=0.0;
63  m_f3=0.0;
64  m_sigma2=0.0;
65  m_trk=0.0;
71 
72  SG_ADD(&m_yy, "yy", "yy", MS_NOT_AVAILABLE);
73  SG_ADD(&m_f3, "f3", "f3", MS_NOT_AVAILABLE);
74  SG_ADD(&m_sigma2, "sigma2", "sigma2", MS_NOT_AVAILABLE);
75  SG_ADD(&m_trk, "trk", "trk", MS_NOT_AVAILABLE);
76  SG_ADD(&m_Tmm, "Tmm", "Tmm", MS_NOT_AVAILABLE);
77  SG_ADD(&m_Tnm, "Tnm", "Tnm", MS_NOT_AVAILABLE);
78  SG_ADD(&m_inv_Lm, "inv_Lm", "inv_Lm", MS_NOT_AVAILABLE);
79  SG_ADD(&m_inv_La, "inv_La", "inv_La", MS_NOT_AVAILABLE);
80  SG_ADD(&m_Knm_inv_Lm, "Knm_Inv_Lm", "Knm_Inv_Lm", MS_NOT_AVAILABLE);
81 }
82 
84 {
85 }
86 
88 {
90 
91  if (!m_gradient_update)
92  {
93  update_deriv();
94  m_gradient_update=true;
96  }
97 }
98 
100 {
101  SG_DEBUG("entering\n");
102 
104  update_chol();
105  update_alpha();
106  m_gradient_update=false;
108 
109  SG_DEBUG("leaving\n");
110 }
111 
113  CInference* inference)
114 {
115  if (inference==NULL)
116  return NULL;
117 
119  SG_SERROR("Provided inference is not of type CVarDTCInferenceMethod!\n")
120 
121  SG_REF(inference);
122  return (CVarDTCInferenceMethod*)inference;
123 }
124 
126 {
128 
130  "VarDTC inference method can only use Gaussian likelihood function\n")
131  REQUIRE(m_labels->get_label_type()==LT_REGRESSION, "Labels must be type "
132  "of CRegressionLabels\n")
133 }
134 
136 {
138  //the inference method does not need to use this
139  return SGVector<float64_t>();
140 }
141 
143 {
145  update();
146 
149 
150  //F012 =-(model.n-model.m)*model.Likelihood.logtheta-0.5*model.n*log(2*pi)-(0.5/sigma2)*(model.yy)-sum(log(diag(La)));
153  0.5*m_yy/(m_sigma2)-eigen_inv_La.diagonal().array().log().sum();
154 
155  //F3 = (0.5/sigma2)*(yKnmInvLmInvLa*yKnmInvLmInvLa');
156  float64_t neg_f3=-m_f3;
157 
158  //model.TrKnn = sum(model.diagKnn);
159  //TrK = - (0.5/sigma2)*(model.TrKnn - sum(diag(C)) );
160  float64_t neg_trk=-m_trk;
161 
162  //F = F012 + F3 + TrK;
163  //F = - F;
164  return neg_f012+neg_f3+neg_trk;
165 }
166 
168 {
169  // get the sigma variable from the Gaussian likelihood model
171  float64_t sigma=lik->get_sigma();
172  SG_UNREF(lik);
173  m_sigma2=sigma*sigma;
174 
175  //m-by-m matrix
177 
178  //m-by-n matrix
180 
182 
183  //Lm = chol(model.Kmm + model.jitter*eye(model.m))
184  LLT<MatrixXd> Luu(eigen_kuu*CMath::exp(m_log_scale*2.0)+CMath::exp(m_log_ind_noise)*MatrixXd::Identity(
186  m_inv_Lm=SGMatrix<float64_t>(Luu.rows(), Luu.cols());
188  //invLm = Lm\eye(model.m);
189  eigen_inv_Lm=Luu.matrixU().solve(MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
190 
193  //KnmInvLm = model.Knm*invLm;
194  eigen_Knm_inv_Lm=(eigen_ktru.transpose()*CMath::exp(m_log_scale*2.0))*eigen_inv_Lm;
195 
198  //C = KnmInvLm'*KnmInvLm;
199  eigen_C=eigen_Knm_inv_Lm.transpose()*eigen_Knm_inv_Lm;
200 
203  //A = sigma2*eye(model.m) + C;
204  LLT<MatrixXd> chol_A(m_sigma2*MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols)+eigen_C);
205 
206  //La = chol(A);
207  //invLa = La\eye(model.m);
208  eigen_inv_La=chol_A.matrixU().solve(MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
209 
210  //L=-invLm*invLm' + sigma2*(invLm*invLa*invLa'*invLm');
213  eigen_L=eigen_inv_Lm*(
214  m_sigma2*eigen_inv_La*eigen_inv_La.transpose()-MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols)
215  )*eigen_inv_Lm.transpose();
216 
217  //TrK = - (0.5/sigma2)*(model.TrKnn - sum(diag(C)) );
218  m_trk=-0.5/(m_sigma2)*(eigen_ktrtr_diag.array().sum()*CMath::exp(m_log_scale*2.0)
219  -eigen_C.diagonal().array().sum());
220 }
221 
223 {
227 
228  SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
229  Map<VectorXd> eigen_y(y.vector, y.vlen);
231  Map<VectorXd> eigen_m(m.vector, m.vlen);
232 
233  //yKnmInvLm = (model.y'*KnmInvLm);
234  //yKnmInvLmInvLa = yKnmInvLm*invLa;
235  VectorXd y_cor=eigen_y-eigen_m;
236  VectorXd eigen_y_Knm_inv_Lm_inv_La_transpose=eigen_inv_La.transpose()*(
237  eigen_Knm_inv_Lm.transpose()*y_cor);
238  //alpha = invLm*invLa*yKnmInvLmInvLa';
240  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
241  eigen_alpha=eigen_inv_Lm*eigen_inv_La*eigen_y_Knm_inv_Lm_inv_La_transpose;
242 
243  m_yy=y_cor.dot(y_cor);
244  //F3 = (0.5/sigma2)*(yKnmInvLmInvLa*yKnmInvLmInvLa');
245  m_f3=0.5*eigen_y_Knm_inv_Lm_inv_La_transpose.dot(eigen_y_Knm_inv_Lm_inv_La_transpose)/m_sigma2;
246 }
247 
249 {
252  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
254  //m-by-n matrix
257 
258  //m_Tmm=SGMatrix<float64_t>(m_kuu.num_rows, m_kuu.num_cols);
260 
263 
265  float64_t sigma=lik->get_sigma();
266  SG_UNREF(lik);
267  m_sigma2=sigma*sigma;
268 
269  //invLmInvLa = invLm*invLa;
270  //invA = invLmInvLa*invLmInvLa';
271  //yKnmInvA = yKnmInvLmInvLa*invLmInvLa';
272  //invKmm = invLm*invLm';
273  //Tmm = sigma2*invA + yKnmInvA'*yKnmInvA;
274  //Tmm = invKmm - Tmm;
275  MatrixXd Tmm=-eigen_L-eigen_alpha*eigen_alpha.transpose();
276 
277  //Tnm = model.Knm*Tmm;
278  eigen_Tnm=(eigen_ktru.transpose()*CMath::exp(m_log_scale*2.0))*Tmm;
279 
280  //Tmm = Tmm - (invLm*(C*invLm'))/sigma2;
281  eigen_Tmm = Tmm - (eigen_inv_Lm*eigen_C*eigen_inv_Lm.transpose()/m_sigma2);
282 
283  SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
284  Map<VectorXd> eigen_y(y.vector, y.vlen);
286  Map<VectorXd> eigen_m(m.vector, m.vlen);
287 
288  //Tnm = Tnm + (model.y*yKnmInvA);
289  eigen_Tnm += (eigen_y-eigen_m)*eigen_alpha.transpose();
290 }
291 
293 {
295  //TODO: implement this method once I get time
296  return SGVector<float64_t>();
297 }
298 
300 {
302  //TODO: implement this method once I get time
303  return SGMatrix<float64_t>();
304 }
305 
307  const TParameter* param)
308 {
309  REQUIRE(!strcmp(param->m_name, "log_sigma"), "Can't compute derivative of "
310  "the nagative log marginal likelihood wrt %s.%s parameter\n",
311  m_model->get_name(), param->m_name)
312 
313  SGVector<float64_t> dlik(1);
314 
315  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
319  //yKnmInvLmInvLainvLa = yKnmInvLmInvLa*invLa';
320  //sigma2aux = sigma2*sum(sum(invLa.*invLa)) + yKnmInvLmInvLainvLa*yKnmInvLmInvLainvLa';
321  float64_t sigma2aux= m_sigma2*eigen_inv_La.cwiseProduct(eigen_inv_La).array().sum()+
322  eigen_alpha.transpose()*(eigen_kuu*CMath::exp(m_log_scale*2.0)
323  +CMath::exp(m_log_ind_noise)*MatrixXd::Identity(
324  m_kuu.num_rows, m_kuu.num_cols))*eigen_alpha;
325  //Dlik_neg = - (model.n-model.m) + model.yy/sigma2 - 2*F3 - sigma2aux - 2*TrK;
326 
327  dlik[0]=(m_ktru.num_cols-m_ktru.num_rows)-m_yy/m_sigma2+2.0*m_f3+sigma2aux+2.0*m_trk;
328  return dlik;
329 }
330 
332  const TParameter* param)
333 {
334  //[DXu DXunm] = kernelSparseGradInd(model, Tmm, Tnm);
335  //DXu_neg = DXu + DXunm/model.sigma2;
336 
339 
340  int32_t dim=m_inducing_features.num_rows;
341  int32_t num_samples=m_inducing_features.num_cols;
342  SGVector<float64_t>deriv_lat(dim*num_samples);
343  deriv_lat.zero();
344 
345  m_lock->lock();
346  CFeatures *inducing_features=get_inducing_features();
347  //asymtric part (related to xu and x)
348  m_kernel->init(inducing_features, m_features);
349  for(int32_t lat_idx=0; lat_idx<eigen_Tnm.cols(); lat_idx++)
350  {
351  Map<VectorXd> deriv_lat_col_vec(deriv_lat.vector+lat_idx*dim,dim);
352  //p by n
353  SGMatrix<float64_t> deriv_mat=m_kernel->get_parameter_gradient(param, lat_idx);
354  Map<MatrixXd> eigen_deriv_mat(deriv_mat.matrix, deriv_mat.num_rows, deriv_mat.num_cols);
355  //DXunm/model.sigma2;
356  deriv_lat_col_vec+=eigen_deriv_mat*(-CMath::exp(m_log_scale*2.0)/m_sigma2*eigen_Tnm.col(lat_idx));
357  }
358 
359  //symtric part (related to xu and xu)
360  m_kernel->init(inducing_features, inducing_features);
361  for(int32_t lat_lidx=0; lat_lidx<eigen_Tmm.cols(); lat_lidx++)
362  {
363  Map<VectorXd> deriv_lat_col_vec(deriv_lat.vector+lat_lidx*dim,dim);
364  //p by n
365  SGMatrix<float64_t> deriv_mat=m_kernel->get_parameter_gradient(param, lat_lidx);
366  Map<MatrixXd> eigen_deriv_mat(deriv_mat.matrix, deriv_mat.num_rows, deriv_mat.num_cols);
367  //DXu
368  deriv_lat_col_vec+=eigen_deriv_mat*(-CMath::exp(m_log_scale*2.0)*eigen_Tmm.col(lat_lidx));
369  }
370  SG_UNREF(inducing_features);
371  m_lock->unlock();
372  return deriv_lat;
373 }
374 
376  const TParameter* param)
377 {
378  REQUIRE(param, "Param not set\n");
379  REQUIRE(!strcmp(param->m_name, "log_inducing_noise"), "Can't compute derivative of "
380  "the nagative log marginal likelihood wrt %s.%s parameter\n",
381  get_name(), param->m_name)
382 
384  SGVector<float64_t> result(1);
385  result[0]=-0.5*CMath::exp(m_log_ind_noise)*eigen_Tmm.diagonal().array().sum();
386  return result;
387 }
388 
391 {
392  Map<VectorXd> eigen_ddiagKi(ddiagKi.vector, ddiagKi.vlen);
393  Map<MatrixXd> eigen_dKuui(dKuui.matrix, dKuui.num_rows, dKuui.num_cols);
394  Map<MatrixXd> eigen_dKui(dKui.matrix, dKui.num_rows, dKui.num_cols);
395 
398 
399  //[Dkern Dkernnm DTrKnn] = kernelSparseGradHyp(model, Tmm, Tnm);
400  //Dkern_neg = 0.5*Dkern + Dkernnm/model.sigma2 - (0.5/model.sigma2)*DTrKnn;
401  float64_t dkern= -0.5*eigen_dKuui.cwiseProduct(eigen_Tmm).sum()
402  -eigen_dKui.cwiseProduct(eigen_Tnm.transpose()).sum()/m_sigma2
403  +0.5*eigen_ddiagKi.array().sum()/m_sigma2;
404  return dkern;
405 }
406 
408  const TParameter* param)
409 {
410  REQUIRE(param, "Param not set\n");
411  SGVector<float64_t> result;
412  int64_t len=const_cast<TParameter *>(param)->m_datatype.get_num_elements();
413  result=SGVector<float64_t>(len);
414 
415  SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
416  Map<VectorXd> eigen_y(y.vector, y.vlen);
418  Map<VectorXd> eigen_m(m.vector, m.vlen);
419  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
420 
421  //m-by-n matrix
423 
424  for (index_t i=0; i<result.vlen; i++)
425  {
427  Map<VectorXd> eigen_dmu(dmu.vector, dmu.vlen);
428 
429  result[i]=eigen_dmu.dot(eigen_ktru.transpose()*CMath::exp(m_log_scale*2.0)
430  *eigen_alpha+(eigen_m-eigen_y))/m_sigma2;
431  }
432  return result;
433 }
434 
436 {
437  SG_WARNING("The method does not require a minimizer. The provided minimizer will not be used.\n");
438 }
439 
virtual const char * get_name() const =0
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Kernel.cpp:97
float64_t m_log_scale
Definition: Inference.h:485
virtual void update()
Definition: Inference.cpp:243
virtual ELabelType get_label_type() const =0
virtual void update_parameter_hash()
Definition: SGObject.cpp:282
Class that models Gaussian likelihood.
virtual SGVector< float64_t > get_derivative_wrt_inducing_noise(const TParameter *param)
Real Labels are real-valued labels.
virtual SGVector< float64_t > get_posterior_mean()
int32_t index_t
Definition: common.h:72
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
virtual EInferenceType get_inference_type() const
Definition: Inference.h:104
real valued labels (e.g. for regression, classifier outputs)
Definition: LabelTypes.h:22
CKernel * m_kernel
Definition: Inference.h:464
The inference method class based on the Titsias&#39; variational bound. For more details, see Titsias, Michalis K. "Variational learning of inducing variables in sparse Gaussian processes." International Conference on Artificial Intelligence and Statistics. 2009.
virtual float64_t get_derivative_related_cov(SGVector< float64_t > ddiagKi, SGMatrix< float64_t > dKuui, SGMatrix< float64_t > dKui)
Definition: SGMatrix.h:25
virtual ELikelihoodModelType get_model_type() const
parameter struct
#define REQUIRE(x,...)
Definition: SGIO.h:181
#define SG_NOTIMPLEMENTED
Definition: SGIO.h:138
virtual SGVector< float64_t > get_mean_vector(const CFeatures *features) const =0
virtual SGVector< float64_t > get_diagonal_vector()
virtual SGVector< float64_t > get_derivative_wrt_mean(const TParameter *param)
An abstract class of the mean function.
Definition: MeanFunction.h:49
#define SG_REF(x)
Definition: SGObject.h:52
virtual void check_members() const
CFeatures * m_features
Definition: Inference.h:473
CMeanFunction * m_mean
Definition: Inference.h:467
The sparse inference base class for classification and regression for 1-D labels (1D regression and b...
CLabels * m_labels
Definition: Inference.h:476
virtual SGVector< float64_t > get_derivative_wrt_likelihood_model(const TParameter *param)
double float64_t
Definition: common.h:60
static CVarDTCInferenceMethod * obtain_from_generic(CInference *inference)
virtual void compute_gradient()
Definition: Inference.cpp:270
virtual CFeatures * get_inducing_features()
index_t num_rows
Definition: SGMatrix.h:495
SGMatrix< float64_t > m_kuu
SGMatrix< float64_t > m_L
Definition: Inference.h:482
index_t num_cols
Definition: SGMatrix.h:497
virtual SGMatrix< float64_t > get_posterior_covariance()
SG_FORCED_INLINE void lock()
Definition: Lock.h:23
SGVector< float64_t > m_ktrtr_diag
static CGaussianLikelihood * obtain_from_generic(CLikelihoodModel *lik)
virtual SGVector< float64_t > get_parameter_derivative(const CFeatures *features, const TParameter *param, index_t index=-1)
Definition: MeanFunction.h:73
#define SG_UNREF(x)
Definition: SGObject.h:53
#define SG_DEBUG(...)
Definition: SGIO.h:106
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
T sum(const Container< T > &a, bool no_diag=false)
The Inference Method base class.
Definition: Inference.h:81
SGMatrix< float64_t > m_inducing_features
The class Features is the base class of all feature objects.
Definition: Features.h:69
#define SG_SERROR(...)
Definition: SGIO.h:164
static float64_t exp(float64_t x)
Definition: Math.h:551
virtual SGMatrix< float64_t > get_parameter_gradient(const TParameter *param, index_t index=-1)
virtual const char * get_name() const
static float64_t log(float64_t v)
Definition: Math.h:714
The Kernel base class.
SGMatrix< float64_t > m_ktru
SG_FORCED_INLINE void unlock()
Definition: Lock.h:34
virtual SGVector< float64_t > get_derivative_wrt_inducing_features(const TParameter *param)
The minimizer base class.
Definition: Minimizer.h:43
#define SG_WARNING(...)
Definition: SGIO.h:127
#define SG_ADD(...)
Definition: SGObject.h:93
virtual float64_t get_negative_log_marginal_likelihood()
CLikelihoodModel * m_model
Definition: Inference.h:470
virtual bool parameter_hash_changed()
Definition: SGObject.cpp:296
The Likelihood model base class.
index_t vlen
Definition: SGVector.h:571
virtual void register_minimizer(Minimizer *minimizer)
SGVector< float64_t > m_alpha
Definition: Inference.h:479
static const float64_t PI
Definition: Math.h:1875

SHOGUN Machine Learning Toolbox - Documentation