SHOGUN  6.1.3
Inference.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (W) 2013 Heiko Strathmann
4  * Written (W) 2013 Roman Votyakov
5  * Written (W) 2012 Jacob Walker
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright notice, this
12  * list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
21  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * The views and conclusions contained in the software and documentation are those
29  * of the authors and should not be interpreted as representing official policies,
30  * either expressed or implied, of the Shogun Development Team.
31  *
32  */
33 #include <shogun/lib/config.h>
34 
35 
40 
41 using namespace shogun;
42 
44 {
45  init();
46 }
47 
49 {
50  return CMath::exp(m_log_scale);
51 }
52 
54 {
55  REQUIRE(scale>0, "Scale (%f) must be positive", scale);
56  m_log_scale=CMath::log(scale);
57 }
58 
60 {
62  update();
63 
64  return SGMatrix<float64_t>(m_E);
65 }
66 
68  CMeanFunction* mean, CLabels* labels, CLikelihoodModel* model)
69 {
70  init();
71 
72  set_kernel(kernel);
73  set_features(features);
74  set_labels(labels);
75  set_model(model);
76  set_mean(mean);
77 }
78 
80 {
87 }
88 
89 void CInference::init()
90 {
91  SG_ADD((CSGObject**)&m_kernel, "kernel", "Kernel", MS_AVAILABLE);
92  SG_ADD(&m_log_scale, "log_scale", "Kernel log scale", MS_AVAILABLE, GRADIENT_AVAILABLE);
93  SG_ADD((CSGObject**)&m_model, "likelihood_model", "Likelihood model",
94  MS_AVAILABLE);
95  SG_ADD((CSGObject**)&m_mean, "mean_function", "Mean function", MS_AVAILABLE);
96  SG_ADD((CSGObject**)&m_labels, "labels", "Labels", MS_NOT_AVAILABLE);
97  SG_ADD((CSGObject**)&m_features, "features", "Features", MS_NOT_AVAILABLE);
98  SG_ADD(&m_gradient_update, "gradient_update", "Whether gradients are updated", MS_NOT_AVAILABLE);
99 
100 
101  m_kernel=NULL;
102  m_model=NULL;
103  m_labels=NULL;
104  m_features=NULL;
105  m_mean=NULL;
106  m_log_scale=0.0;
107  m_gradient_update=false;
108  m_minimizer=NULL;
109 
110  SG_ADD((CSGObject**)&m_minimizer, "Inference__m_minimizer", "minimizer in Inference", MS_NOT_AVAILABLE);
111  SG_ADD(&m_alpha, "alpha", "alpha vector used in process mean calculation", MS_NOT_AVAILABLE);
112  SG_ADD(&m_L, "L", "upper triangular factor of Cholesky decomposition", MS_NOT_AVAILABLE);
113  SG_ADD(&m_E, "E", "the matrix used for multi classification", MS_NOT_AVAILABLE);
114 }
115 
117 {
118  REQUIRE(minimizer, "Minimizer must set\n");
119  if(minimizer!=m_minimizer)
120  {
121  SG_REF(minimizer);
123  m_minimizer=minimizer;
124  }
125 }
126 
128  int32_t num_importance_samples, float64_t ridge_size)
129 {
130  /* sample from Gaussian approximation to q(f|y) */
132 
133  /* add ridge */
134  for (index_t i=0; i<cov.num_rows; ++i)
135  cov(i,i)+=ridge_size;
136 
138 
139  CGaussianDistribution* post_approx=new CGaussianDistribution(mean, cov);
140  SGMatrix<float64_t> samples=post_approx->sample(num_importance_samples);
141 
142  /* evaluate q(f^i|y), p(f^i|\theta), p(y|f^i), i.e.,
143  * log pdf of approximation, prior and likelihood */
144 
145  /* log pdf q(f^i|y) */
146  SGVector<float64_t> log_pdf_post_approx=post_approx->log_pdf_multiple(samples);
147 
148  /* dont need gaussian anymore, free memory */
149  SG_UNREF(post_approx);
150  post_approx=NULL;
151 
152  /* log pdf p(f^i|\theta) and free memory afterwise. Scale kernel before */
154  sg_memcpy(scaled_kernel.matrix, m_ktrtr.matrix,
156  for (index_t i=0; i<m_ktrtr.num_rows*m_ktrtr.num_cols; ++i)
157  scaled_kernel.matrix[i]*=CMath::exp(m_log_scale*2.0);
158 
159  /* add ridge */
160  for (index_t i=0; i<m_ktrtr.num_rows; ++i)
161  scaled_kernel(i,i)+=ridge_size;
162 
164  m_mean->get_mean_vector(m_features), scaled_kernel);
165  SGVector<float64_t> log_pdf_prior=prior->log_pdf_multiple(samples);
166  SG_UNREF(prior);
167  prior=NULL;
168 
169  /* p(y|f^i) */
171  m_labels, samples);
172 
173  /* combine probabilities */
174  ASSERT(log_likelihood.vlen==num_importance_samples);
175  ASSERT(log_likelihood.vlen==log_pdf_prior.vlen);
176  ASSERT(log_likelihood.vlen==log_pdf_post_approx.vlen);
177  SGVector<float64_t> sum(log_likelihood);
178  for (index_t i=0; i<log_likelihood.vlen; ++i)
179  sum[i]=log_likelihood[i]+log_pdf_prior[i]-log_pdf_post_approx[i];
180 
181  /* use log-sum-exp (in particular, log-mean-exp) trick to combine values */
182  return CMath::log_mean_exp(sum);
183 }
184 
187 {
188  REQUIRE(params->get_num_elements(), "Number of parameters should be greater "
189  "than zero\n")
190 
192 
193  // get number of derivatives
194  const index_t num_deriv=params->get_num_elements();
195 
196  // create map of derivatives
198  new CMap<TParameter*, SGVector<float64_t> >(num_deriv, num_deriv);
199 
200  SG_REF(result);
201 
202  #pragma omp parallel for
203  for (index_t i=0; i<num_deriv; i++)
204  {
205  CMapNode<TParameter*, CSGObject*>* node=params->get_node_ptr(i);
206  SGVector<float64_t> gradient;
207 
208  if(node->data == this)
209  {
210  // try to find dervative wrt InferenceMethod.parameter
211  gradient=this->get_derivative_wrt_inference_method(node->key);
212  }
213  else if (node->data == this->m_model)
214  {
215  // try to find derivative wrt LikelihoodModel.parameter
216  gradient=this->get_derivative_wrt_likelihood_model(node->key);
217  }
218  else if (node->data ==this->m_kernel)
219  {
220  // try to find derivative wrt Kernel.parameter
221  gradient=this->get_derivative_wrt_kernel(node->key);
222  }
223  else if (node->data ==this->m_mean)
224  {
225  // try to find derivative wrt MeanFunction.parameter
226  gradient=this->get_derivative_wrt_mean(node->key);
227  }
228  else
229  {
230  SG_SERROR("Can't compute derivative of negative log marginal "
231  "likelihood wrt %s.%s", node->data->get_name(), node->key->m_name);
232  }
233 
234  #pragma omp critical
235  {
236  result->add(node->key, gradient);
237  }
238  }
239 
240  return result;
241 }
242 
244 {
245  check_members();
247 }
248 
250 {
251  REQUIRE(m_features, "Training features should not be NULL\n")
253  "Number of training features must be greater than zero\n")
254  REQUIRE(m_labels, "Labels should not be NULL\n")
256  "Number of labels must be greater than zero\n")
258  "Number of training vectors (%d) must match number of labels (%d)\n",
260  REQUIRE(m_kernel, "Kernel should not be NULL\n")
261  REQUIRE(m_mean, "Mean function should not be NULL\n")
262 }
263 
265 {
268 }
269 
271 {
273  update();
274 }
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Kernel.cpp:97
float64_t m_log_scale
Definition: Inference.h:485
virtual void update_train_kernel()
Definition: Inference.cpp:264
virtual void update()
Definition: Inference.cpp:243
virtual ~CInference()
Definition: Inference.cpp:79
int32_t index_t
Definition: common.h:72
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
virtual int32_t get_num_labels() const =0
CKernel * m_kernel
Definition: Inference.h:464
void scale(SGVector< T > &a, SGVector< T > &result, T alpha=1)
CMapNode< K, T > * get_node_ptr(int32_t index)
Definition: Map.h:247
virtual int32_t get_num_vectors() const =0
virtual void set_scale(float64_t scale)
Definition: Inference.cpp:53
#define REQUIRE(x,...)
Definition: SGIO.h:181
virtual SGVector< float64_t > get_mean_vector(const CFeatures *features) const =0
int32_t get_num_elements() const
Definition: Map.h:211
SGMatrix< float64_t > m_E
Definition: Inference.h:491
An abstract class of the mean function.
Definition: MeanFunction.h:49
std::enable_if<!std::is_same< T, complex128_t >::value, float64_t >::type mean(const Container< T > &a)
SGMatrix< float64_t > get_kernel_matrix()
virtual void set_labels(CLabels *lab)
Definition: Inference.h:323
#define SG_REF(x)
Definition: SGObject.h:52
CFeatures * m_features
Definition: Inference.h:473
SGMatrix< float64_t > m_ktrtr
Definition: Inference.h:488
virtual SGVector< float64_t > get_derivative_wrt_mean(const TParameter *param)=0
virtual SGMatrix< float64_t > get_posterior_covariance()=0
CMeanFunction * m_mean
Definition: Inference.h:467
virtual SGVector< float64_t > get_log_probability_fmatrix(const CLabels *lab, SGMatrix< float64_t > F) const
#define ASSERT(x)
Definition: SGIO.h:176
virtual SGMatrix< float64_t > get_multiclass_E()
Definition: Inference.cpp:59
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:124
CLabels * m_labels
Definition: Inference.h:476
virtual float64_t get_scale() const
Definition: Inference.cpp:48
double float64_t
Definition: common.h:60
virtual void compute_gradient()
Definition: Inference.cpp:270
the class CMap, a map based on the hash-table. w: http://en.wikipedia.org/wiki/Hash_table ...
Definition: SGObject.h:42
index_t num_rows
Definition: SGMatrix.h:495
virtual void set_model(CLikelihoodModel *mod)
Definition: Inference.h:340
virtual void set_kernel(CKernel *kern)
Definition: Inference.h:289
virtual SGVector< float64_t > get_derivative_wrt_inference_method(const TParameter *param)=0
SGMatrix< float64_t > m_L
Definition: Inference.h:482
virtual SGVector< float64_t > get_posterior_mean()=0
index_t num_cols
Definition: SGMatrix.h:497
virtual SGVector< float64_t > log_pdf_multiple(SGMatrix< float64_t > samples) const
virtual void register_minimizer(Minimizer *minimizer)
Definition: Inference.cpp:116
virtual void set_features(CFeatures *feat)
Definition: Inference.h:272
virtual SGVector< float64_t > get_derivative_wrt_kernel(const TParameter *param)=0
#define SG_UNREF(x)
Definition: SGObject.h:53
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
Dense version of the well-known Gaussian probability distribution, defined as .
T sum(const Container< T > &a, bool no_diag=false)
int32_t add(const K &key, const T &data)
Definition: Map.h:101
float64_t get_marginal_likelihood_estimate(int32_t num_importance_samples=1, float64_t ridge_size=1e-15)
Definition: Inference.cpp:127
Minimizer * m_minimizer
Definition: Inference.h:461
The class Features is the base class of all feature objects.
Definition: Features.h:69
#define SG_SERROR(...)
Definition: SGIO.h:164
static float64_t exp(float64_t x)
Definition: Math.h:551
static float64_t log(float64_t v)
Definition: Math.h:714
The Kernel base class.
static T log_mean_exp(SGVector< T > values)
Definition: Math.h:1105
virtual CMap< TParameter *, SGVector< float64_t > > * get_negative_log_marginal_likelihood_derivatives(CMap< TParameter *, CSGObject * > *parameters)
Definition: Inference.cpp:186
The minimizer base class.
Definition: Minimizer.h:43
#define SG_ADD(...)
Definition: SGObject.h:93
CLikelihoodModel * m_model
Definition: Inference.h:470
virtual bool parameter_hash_changed()
Definition: SGObject.cpp:296
virtual void set_mean(CMeanFunction *m)
Definition: Inference.h:306
The Likelihood model base class.
index_t vlen
Definition: SGVector.h:571
SGVector< float64_t > m_alpha
Definition: Inference.h:479
virtual SGMatrix< float64_t > sample(int32_t num_samples, SGMatrix< float64_t > pre_samples=SGMatrix< float64_t >()) const
virtual void check_members() const
Definition: Inference.cpp:249
virtual SGVector< float64_t > get_derivative_wrt_likelihood_model(const TParameter *param)=0

SHOGUN Machine Learning Toolbox - Documentation