SHOGUN  6.1.3
KLInference.cpp
Go to the documentation of this file.
1  /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2014 Wu Lin
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  *
30  * This code specifically adapted from function in approxKL.m and infKL.m
31  */
32 
34 
39 
40 
41 using namespace Eigen;
42 
43 namespace shogun
44 {
45 
46 #ifndef DOXYGEN_SHOULD_SKIP_THIS
47 class KLInferenceCostFunction: public FirstOrderCostFunction
48 {
49 public:
50  KLInferenceCostFunction():FirstOrderCostFunction() { init(); }
51  virtual ~KLInferenceCostFunction() { SG_UNREF(m_obj); }
52  void set_target(CKLInference *obj)
53  {
54  REQUIRE(obj,"Obj must set\n");
55  if(m_obj!=obj)
56  {
57  SG_REF(obj);
58  SG_UNREF(m_obj);
59  m_obj=obj;
60  }
61  }
62  void unset_target(bool is_unref)
63  {
64  if(is_unref)
65  {
66  SG_UNREF(m_obj);
67  }
68  m_obj=NULL;
69  }
70 
71  virtual float64_t get_cost()
72  {
73  REQUIRE(m_obj,"Object not set\n");
74  bool status = m_obj->precompute();
75  if (!status)
76  return CMath::NOT_A_NUMBER;
77  float64_t nlml=m_obj->get_nlml_wrt_parameters();
78  return nlml;
79 
80  }
81  virtual SGVector<float64_t> obtain_variable_reference()
82  {
83  REQUIRE(m_obj,"Object not set\n");
84  m_derivatives = SGVector<float64_t>((m_obj->m_alpha).vlen);
85  return m_obj->m_alpha;
86  }
87  virtual SGVector<float64_t> get_gradient()
88  {
89  REQUIRE(m_obj,"Object not set\n");
90  m_obj->get_gradient_of_nlml_wrt_parameters(m_derivatives);
91  return m_derivatives;
92  }
93 
94  virtual const char* get_name() const { return "KLInferenceCostFunction"; }
95 private:
96  SGVector<float64_t> m_derivatives;
97  void init()
98  {
99  m_obj=NULL;
100  m_derivatives = SGVector<float64_t>();
101  SG_ADD(&m_derivatives, "KLInferenceCostFunction__m_derivatives",
102  "derivatives in KLInferenceCostFunction", MS_NOT_AVAILABLE);
103  SG_ADD((CSGObject **)&m_obj, "KLInferenceCostFunction__m_obj",
104  "obj in KLInferenceCostFunction", MS_NOT_AVAILABLE);
105  }
106  CKLInference *m_obj;
107 };
108 #endif //DOXYGEN_SHOULD_SKIP_THIS
109 
110 CKLInference::CKLInference() : CInference()
111 {
112  init();
113 }
114 
116  CFeatures* feat, CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod)
117  : CInference(kern, feat, m, lab, mod)
118 {
119  init();
121 }
122 
124 {
125  REQUIRE(mod, "the likelihood model must not be NULL\n")
127  REQUIRE(lik,
128  "The provided likelihood model (%s) must support variational Gaussian inference. ",
129  "Please use a Variational Gaussian Likelihood model\n",
130  mod->get_name());
131 }
132 
134 {
137 }
138 
139 void CKLInference::init()
140 {
141  m_noise_factor=1e-10;
142  m_max_attempt=0;
143  m_exp_factor=2;
144  m_min_coeff_kernel=1e-5;
145  SG_ADD(&m_noise_factor, "noise_factor",
146  "The noise factor used for correcting Kernel matrix",
148  SG_ADD(&m_exp_factor, "exp_factor",
149  "The exponential factor used for increasing noise_factor",
151  SG_ADD(&m_max_attempt, "max_attempt",
152  "The max number of attempt to correct Kernel matrix",
154  SG_ADD(&m_min_coeff_kernel, "min_coeff_kernel",
155  "The minimum coeefficient of kernel matrix in LDLT factorization used to check whether the kernel matrix is positive definite or not",
157  SG_ADD(&m_s2, "s2",
158  "Variational parameter sigma2",
160  SG_ADD(&m_mu, "mu",
161  "Variational parameter mu and posterior mean",
163  SG_ADD(&m_Sigma, "Sigma",
164  "Posterior covariance matrix Sigma",
167 }
168 
170 {
171 }
172 
174 {
176 
177  if (!m_gradient_update)
178  {
180  update_deriv();
181  m_gradient_update=true;
183  }
184 }
185 
187 {
188  SG_DEBUG("entering\n");
189 
191  update_init();
192  update_alpha();
193  update_chol();
194  m_gradient_update=false;
196 
197  SG_DEBUG("leaving\n");
198 }
199 
201 {
202  REQUIRE(noise_factor>=0, "The noise_factor %.20f should be non-negative\n", noise_factor);
203  m_noise_factor=noise_factor;
204 }
205 
207 {
208  REQUIRE(min_coeff_kernel>=0, "The min_coeff_kernel %.20f should be non-negative\n", min_coeff_kernel);
209  m_min_coeff_kernel=min_coeff_kernel;
210 }
211 
213 {
214  REQUIRE(max_attempt>=0, "The max_attempt %d should be non-negative. 0 means inifity attempts\n", max_attempt);
215  m_max_attempt=max_attempt;
216 }
217 
219 {
220  REQUIRE(exp_factor>1.0, "The exp_factor %f should be greater than 1.0.\n", exp_factor);
221  m_exp_factor=exp_factor;
222 }
223 
225 {
227 }
228 
230 {
232 
233  eigen_K=eigen_K+m_noise_factor*MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols);
234 
236  ldlt.compute(eigen_K*CMath::exp(m_log_scale*2.0));
237 
238  float64_t attempt_count=0;
239  MatrixXd Kernel_D=ldlt.vectorD();
240  float64_t noise_factor=m_noise_factor;
241 
242  while (Kernel_D.minCoeff()<=m_min_coeff_kernel)
243  {
244  if (m_max_attempt>0 && attempt_count>m_max_attempt)
245  SG_ERROR("The Kernel matrix is highly non-positive definite since the min_coeff_kernel is less than %.20f",
246  " even when adding %.20f noise to the diagonal elements at max %d attempts\n",
247  m_min_coeff_kernel, noise_factor, m_max_attempt);
248  attempt_count++;
249  float64_t pre_noise_factor=noise_factor;
250  noise_factor*=m_exp_factor;
251  //updat the noise eigen_K=eigen_K+noise_factor*(m_exp_factor^attempt_count)*Identity()
252  eigen_K=eigen_K+(noise_factor-pre_noise_factor)*MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols);
253  ldlt.compute(eigen_K*CMath::exp(m_log_scale*2.0));
254  Kernel_D=ldlt.vectorD();
255  }
256 
257  return ldlt;
258 }
259 
260 
262 {
264 
265  return SGVector<float64_t>(m_mu);
266 }
267 
269 {
271 
273 }
274 
276 {
279  return lik;
280 }
281 
283 {
287 }
288 
290 {
292  update();
293 
295 }
296 
298 {
301  return SGVector<float64_t> ();
302 
303  //%lp_dhyp = likKL(v,lik,hyp.lik,y,K*post.alpha+m,[],[],j);
305  Map<VectorXd> eigen_lp_dhyp(lp_dhyp.vector, lp_dhyp.vlen);
306  SGVector<float64_t> result(1);
307  //%dnlZ.lik(j) = -sum(lp_dhyp);
308  result[0]=-eigen_lp_dhyp.sum();
309 
310  return result;
311 }
312 
314 {
315  // create eigen representation of K, Z, dfhat and alpha
316  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen/2);
317 
318  REQUIRE(param, "Param not set\n");
319  SGVector<float64_t> result;
320  int64_t len=const_cast<TParameter *>(param)->m_datatype.get_num_elements();
321  result=SGVector<float64_t>(len);
322 
323  for (index_t i=0; i<result.vlen; i++)
324  {
326 
327  //%dm_t = feval(mean{:}, hyp.mean, x, j);
328  if (result.vlen==1)
330  else
332 
333  Map<VectorXd> eigen_dmu(dmu.vector, dmu.vlen);
334 
335  //%dnlZ.mean(j) = -alpha'*dm_t;
336  result[i]=-eigen_alpha.dot(eigen_dmu);
337  }
338 
339  return result;
340 }
341 
343 {
345  cost_fun->set_target(this);
346  bool cleanup=false;
347  if(this->ref_count()>1)
348  cleanup=true;
349 
351 
352  REQUIRE(opt, "FirstOrderMinimizer is required\n")
353  opt->set_cost_function(cost_fun);
354 
355  float64_t nlml_opt = opt->minimize();
356  opt->unset_cost_function(false);
357  cost_fun->unset_target(cleanup);
358 
359  SG_UNREF(cost_fun);
360  return nlml_opt;
361 }
362 
364 {
365  REQUIRE(minimizer, "Minimizer must set\n");
366  FirstOrderMinimizer* opt= dynamic_cast<FirstOrderMinimizer*>(minimizer);
367  REQUIRE(opt, "FirstOrderMinimizer is required\n");
369 }
370 
371 
373 {
374  REQUIRE(!strcmp(param->m_name, "log_scale"), "Can't compute derivative of "
375  "the nagative log marginal likelihood wrt %s.%s parameter\n",
376  get_name(), param->m_name)
377 
379 
380  SGVector<float64_t> result(1);
381 
383  result[0]*=CMath::exp(m_log_scale*2.0)*2.0;
384 
385  return result;
386 }
387 
389 {
390  REQUIRE(param, "Param not set\n");
391  SGVector<float64_t> result;
392  int64_t len=const_cast<TParameter *>(param)->m_datatype.get_num_elements();
393  result=SGVector<float64_t>(len);
394 
395  for (index_t i=0; i<result.vlen; i++)
396  {
398 
399  //dK = feval(covfunc{:},hyper,x,j);
400  if (result.vlen==1)
401  dK=m_kernel->get_parameter_gradient(param);
402  else
403  dK=m_kernel->get_parameter_gradient(param, i);
404 
405  result[i]=get_derivative_related_cov(dK);
406  result[i]*=CMath::exp(m_log_scale*2.0);
407  }
408 
409  return result;
410 }
411 
413 {
415  update();
416 
417  return SGMatrix<float64_t>(m_L);
418 }
419 
420 } /* namespace shogun */
421 
virtual const char * get_name() const =0
float64_t m_log_scale
Definition: Inference.h:485
virtual bool set_variational_distribution(SGVector< float64_t > mu, SGVector< float64_t > s2, const CLabels *lab)
virtual void update()
Definition: Inference.cpp:243
virtual float64_t optimization()
virtual void set_min_coeff_kernel(float64_t min_coeff_kernel)
virtual void update_parameter_hash()
Definition: SGObject.cpp:282
virtual SGVector< float64_t > get_first_derivative_wrt_hyperparameter(const TParameter *param) const =0
virtual SGVector< float64_t > get_derivative_wrt_kernel(const TParameter *param)
virtual void update()
virtual void set_max_attempt(index_t max_attempt)
int32_t index_t
Definition: common.h:72
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
virtual CVariationalGaussianLikelihood * get_variational_likelihood() const
virtual SGVector< float64_t > get_derivative_wrt_mean(const TParameter *param)
virtual void update_alpha()=0
CKernel * m_kernel
Definition: Inference.h:464
The variational Gaussian Likelihood base class. The variational distribution is Gaussian.
virtual void update_chol()=0
virtual void set_model(CLikelihoodModel *mod)
Definition: SGMatrix.h:25
parameter struct
#define SG_ERROR(...)
Definition: SGIO.h:128
#define REQUIRE(x,...)
Definition: SGIO.h:181
An abstract class of the mean function.
Definition: MeanFunction.h:49
virtual float64_t get_negative_log_marginal_likelihood_helper()=0
virtual void register_minimizer(Minimizer *minimizer)
#define SG_REF(x)
Definition: SGObject.h:52
The class wraps the Shogun&#39;s C-style LBFGS minimizer.
virtual Eigen::LDLT< Eigen::MatrixXd, 0x1 > update_init_helper()
friend class KLInferenceCostFunction
Definition: KLInference.h:77
CFeatures * m_features
Definition: Inference.h:473
SGMatrix< float64_t > m_ktrtr
Definition: Inference.h:488
float64_t m_noise_factor
Definition: KLInference.h:244
CMeanFunction * m_mean
Definition: Inference.h:467
virtual void update_deriv()=0
virtual void check_variational_likelihood(CLikelihoodModel *mod) const
float64_t m_min_coeff_kernel
Definition: KLInference.h:241
virtual SGVector< float64_t > get_derivative_wrt_inference_method(const TParameter *param)
virtual SGMatrix< float64_t > get_posterior_covariance()
virtual void set_noise_factor(float64_t noise_factor)
CLabels * m_labels
Definition: Inference.h:476
virtual SGVector< float64_t > get_posterior_mean()
virtual void unset_cost_function(bool is_unref=true)
virtual float64_t get_derivative_related_cov(SGMatrix< float64_t > dK)=0
double float64_t
Definition: common.h:60
virtual void compute_gradient()
Definition: Inference.cpp:270
float64_t m_exp_factor
Definition: KLInference.h:247
index_t num_rows
Definition: SGMatrix.h:495
virtual void set_model(CLikelihoodModel *mod)
Definition: Inference.h:340
SGVector< float64_t > m_mu
Definition: KLInference.h:367
SGMatrix< float64_t > m_L
Definition: Inference.h:482
virtual SGVector< float64_t > get_derivative_wrt_likelihood_model(const TParameter *param)
index_t num_cols
Definition: SGMatrix.h:497
virtual const char * get_name() const
Definition: KLInference.h:103
virtual float64_t minimize()=0
SGMatrix< float64_t > m_Sigma
Definition: KLInference.h:370
virtual void update_init()
virtual SGMatrix< float64_t > get_cholesky()
virtual void register_minimizer(Minimizer *minimizer)
Definition: Inference.cpp:116
SGVector< float64_t > m_s2
Definition: KLInference.h:375
virtual SGVector< float64_t > get_parameter_derivative(const CFeatures *features, const TParameter *param, index_t index=-1)
Definition: MeanFunction.h:73
#define SG_UNREF(x)
Definition: SGObject.h:53
#define SG_DEBUG(...)
Definition: SGIO.h:106
virtual float64_t get_nlml_wrt_parameters()
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The Inference Method base class.
Definition: Inference.h:81
Minimizer * m_minimizer
Definition: Inference.h:461
The class Features is the base class of all feature objects.
Definition: Features.h:69
static float64_t exp(float64_t x)
Definition: Math.h:551
virtual SGMatrix< float64_t > get_parameter_gradient(const TParameter *param, index_t index=-1)
The Variational Likelihood base class.
The Kernel base class.
virtual float64_t get_negative_log_marginal_likelihood()
int32_t ref_count()
Definition: SGObject.cpp:193
virtual void update_approx_cov()=0
The minimizer base class.
Definition: Minimizer.h:43
#define SG_ADD(...)
Definition: SGObject.h:93
virtual void set_exp_factor(float64_t exp_factor)
virtual bool supports_derivative_wrt_hyperparameter() const =0
CLikelihoodModel * m_model
Definition: Inference.h:470
virtual void compute_gradient()
virtual bool parameter_hash_changed()
Definition: SGObject.cpp:296
The Likelihood model base class.
virtual void set_cost_function(FirstOrderCostFunction *fun)
index_t vlen
Definition: SGVector.h:571
SGVector< float64_t > m_alpha
Definition: Inference.h:479
The first order minimizer base class.

SHOGUN Machine Learning Toolbox - Documentation