SHOGUN  6.1.3
SingleSparseInference.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (W) 2015 Wu Lin
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  *
30  */
31 
33 #ifdef USE_GPL_SHOGUN
34 #ifdef HAVE_NLOPT
35 #include <shogun/optimization/NLOPTMinimizer.h>
36 #endif //HAVE_NLOPT
37 #endif //USE_GPL_SHOGUN
38 
43 
44 using namespace shogun;
45 using namespace Eigen;
46 
47 namespace shogun
48 {
49 
50 #ifndef DOXYGEN_SHOULD_SKIP_THIS
51 
52 class SingleSparseInferenceCostFunction: public FirstOrderBoundConstraintsCostFunction
53 {
54 public:
55  SingleSparseInferenceCostFunction():FirstOrderBoundConstraintsCostFunction() { init(); }
56  virtual ~SingleSparseInferenceCostFunction() { SG_UNREF(m_obj); }
57  virtual const char* get_name() const { return "SingleSparseInferenceCostFunction"; }
58  void set_target(CSingleSparseInference *obj)
59  {
60  REQUIRE(obj,"Object not set\n");
61  if(obj!=m_obj)
62  {
63  SG_REF(obj);
64  SG_UNREF(m_obj);
65  m_obj=obj;
66  m_obj->check_fully_sparse();
67  REQUIRE(m_obj->m_fully_sparse,"Can not compute gradient\n");
68  }
69  }
70  void unset_target(bool is_unref)
71  {
72  if(is_unref)
73  {
74  SG_UNREF(m_obj);
75  }
76  m_obj=NULL;
77  }
78  virtual float64_t get_cost()
79  {
80  REQUIRE(m_obj,"Object not set\n");
81  float64_t nlz=m_obj->get_negative_log_marginal_likelihood();
82  return nlz;
83  }
84  virtual SGVector<float64_t> obtain_variable_reference()
85  {
86  REQUIRE(m_obj,"Object not set\n");
87  SGMatrix<float64_t>& lat_m=m_obj->m_inducing_features;
88  SGVector<float64_t> x(lat_m.matrix,lat_m.num_rows*lat_m.num_cols,false);
89  return x;
90  }
91  virtual SGVector<float64_t> get_gradient()
92  {
93  REQUIRE(m_obj,"Object not set\n");
94  m_obj->compute_gradient();
95  TParameter* param=m_obj->m_gradient_parameters->get_parameter("inducing_features");
96  SGVector<float64_t> derivatives=m_obj->get_derivative_wrt_inducing_features(param);
97  return derivatives;
98  }
99  virtual SGVector<float64_t> get_lower_bound()
100  {
101  REQUIRE(m_obj,"Object not set\n");
102  return m_obj->m_lower_bound;
103  }
104  virtual SGVector<float64_t> get_upper_bound()
105  {
106  REQUIRE(m_obj,"Object not set\n");
107  return m_obj->m_upper_bound;
108  }
109 private:
110  CSingleSparseInference *m_obj;
111  void init()
112  {
113  m_obj=NULL;
114  //The existing implementation in CSGObject::get_parameter_incremental_hash()
115  //can NOT deal with circular reference when parameter_hash_changed() is called
116  //SG_ADD((CSGObject **)&m_obj, "CSigleSparseInference__m_obj",
117  //"m_obj in SingleSparseInferenceCostFunction", MS_NOT_AVAILABLE);
118  }
119 };
120 #endif //DOXYGEN_SHOULD_SKIP_THIS
121 
123 {
124  init();
125 }
126 
128  CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod, CFeatures* lat)
129  : CSparseInference(kern, feat, m, lab, mod, lat)
130 {
131  init();
133 }
134 
135 void CSingleSparseInference::init()
136 {
137  m_fully_sparse=false;
139  SG_ADD(&m_fully_sparse, "fully_Sparse",
140  "whether the kernel support sparse inference", MS_NOT_AVAILABLE);
141  m_lock=new CLock();
142 
143  SG_ADD(&m_upper_bound, "upper_bound",
144  "upper bound of inducing features", MS_NOT_AVAILABLE);
145  SG_ADD(&m_lower_bound, "lower_bound",
146  "lower bound of inducing features", MS_NOT_AVAILABLE);
147  SG_ADD(&m_max_ind_iterations, "max_ind_iterations",
148  "max number of iterations used in inducing features optimization", MS_NOT_AVAILABLE);
149  SG_ADD(&m_ind_tolerance, "ind_tolerance",
150  "tolearance used in inducing features optimization", MS_NOT_AVAILABLE);
152  "opt_inducing_features", "whether optimize inducing features", MS_NOT_AVAILABLE);
153 
155  "inducing_minimizer", "Minimizer used in optimize inducing features", MS_NOT_AVAILABLE);
156 
158  m_ind_tolerance=1e-3;
162 }
163 
165 {
168 }
169 
171 {
173  delete m_lock;
174 }
175 
177 {
178  REQUIRE(m_kernel, "Kernel must be set first\n")
179  if (strstr(m_kernel->get_name(), "SparseKernel")!=NULL)
180  m_fully_sparse=true;
181  else
182  {
183  SG_WARNING( "The provided kernel does not support to optimize inducing features\n");
184  m_fully_sparse=false;
185  }
186 }
187 
189  const TParameter* param)
190 {
191  // the time complexity O(m^2*n) if the TO DO is done
192  REQUIRE(param, "Param not set\n");
193  REQUIRE(!(strcmp(param->m_name, "log_scale")
194  && strcmp(param->m_name, "log_inducing_noise")
195  && strcmp(param->m_name, "inducing_features")),
196  "Can't compute derivative of"
197  " the nagative log marginal likelihood wrt %s.%s parameter\n",
198  get_name(), param->m_name)
199 
200  if (!strcmp(param->m_name, "log_inducing_noise"))
201  // wrt inducing_noise
202  // compute derivative wrt inducing noise
203  return get_derivative_wrt_inducing_noise(param);
204  else if (!strcmp(param->m_name, "inducing_features"))
205  {
207  if (!m_fully_sparse)
208  {
209  int32_t dim=m_inducing_features.num_rows;
210  int32_t num_samples=m_inducing_features.num_cols;
211  res=SGVector<float64_t>(dim*num_samples);
212  SG_WARNING("Derivative wrt %s cannot be computed since the kernel does not support fully sparse inference\n",
213  param->m_name);
214  res.zero();
215  return res;
216  }
218  return res;
219  }
220 
221  // wrt scale
222  // clone kernel matrices
224  SGMatrix<float64_t> deriv_uu=m_kuu.clone();
225  SGMatrix<float64_t> deriv_tru=m_ktru.clone();
226 
227  // create eigen representation of kernel matrices
228  Map<VectorXd> ddiagKi(deriv_trtr.vector, deriv_trtr.vlen);
229  Map<MatrixXd> dKuui(deriv_uu.matrix, deriv_uu.num_rows, deriv_uu.num_cols);
230  Map<MatrixXd> dKui(deriv_tru.matrix, deriv_tru.num_rows, deriv_tru.num_cols);
231 
232  // compute derivatives wrt scale for each kernel matrix
233  SGVector<float64_t> result(1);
234 
235  result[0]=get_derivative_related_cov(deriv_trtr, deriv_uu, deriv_tru);
236  result[0]*=CMath::exp(m_log_scale*2.0)*2.0;
237  return result;
238 }
239 
241  const TParameter* param)
242 {
243  REQUIRE(param, "Param not set\n");
244  SGVector<float64_t> result;
245  int64_t len=const_cast<TParameter *>(param)->m_datatype.get_num_elements();
246  result=SGVector<float64_t>(len);
247 
248  CFeatures *inducing_features=get_inducing_features();
249  for (index_t i=0; i<result.vlen; i++)
250  {
251  SGVector<float64_t> deriv_trtr;
252  SGMatrix<float64_t> deriv_uu;
253  SGMatrix<float64_t> deriv_tru;
254 
255  m_lock->lock();
257  //to reduce the time complexity
258  //the kernel object only computes diagonal elements of gradients wrt hyper-parameter
259  deriv_trtr=m_kernel->get_parameter_gradient_diagonal(param, i);
260 
261  m_kernel->init(inducing_features, inducing_features);
262  deriv_uu=m_kernel->get_parameter_gradient(param, i);
263 
264  m_kernel->init(inducing_features, m_features);
265  deriv_tru=m_kernel->get_parameter_gradient(param, i);
266  m_lock->unlock();
267 
268  // create eigen representation of derivatives
269  Map<VectorXd> ddiagKi(deriv_trtr.vector, deriv_trtr.vlen);
270  Map<MatrixXd> dKuui(deriv_uu.matrix, deriv_uu.num_rows,
271  deriv_uu.num_cols);
272  Map<MatrixXd> dKui(deriv_tru.matrix, deriv_tru.num_rows,
273  deriv_tru.num_cols);
274 
275  result[i]=get_derivative_related_cov(deriv_trtr, deriv_uu, deriv_tru);
276  result[i]*=CMath::exp(m_log_scale*2.0);
277  }
278  SG_UNREF(inducing_features);
279  return result;
280 }
281 
283 {
284  if (bound.vlen>1)
285  {
286  REQUIRE(m_inducing_features.num_rows>0, "Inducing features must set before this method is called\n");
288  "The length of inducing features (%dx%d)",
289  " and the length of bound constraints (%d) are different\n",
291  }
292  else if(bound.vlen==1)
293  {
294  SG_WARNING("All inducing_features (%dx%d) are constrainted by the single value (%f) in the %s bound\n",
296  }
297 }
298 
300 {
301  check_bound(bound,"lower");
302  m_lower_bound=bound;
303 }
305 {
306  check_bound(bound, "upper");
307  m_upper_bound=bound;
308 }
309 
311 {
312  REQUIRE(it>0, "Iteration (%d) must be positive\n",it);
314 }
316 {
317 
318  REQUIRE(tol>0, "Tolearance (%f) must be positive\n",tol);
319  m_ind_tolerance=tol;
320 }
322 {
323  m_opt_inducing_features=is_optmization;
325  {
327  REQUIRE(m_fully_sparse,"Please use a kernel which has the functionality about optimizing inducing features\n");
328  }
329  if(minimizer)
330  {
331  if (minimizer!=m_inducing_minimizer)
332  {
333  SG_REF(minimizer);
335  m_inducing_minimizer=minimizer;
336  }
337  }
338  else
339  {
340 
342 #ifdef USE_GPL_SHOGUN
343 #ifdef HAVE_NLOPT
344  m_inducing_minimizer=new CNLOPTMinimizer();
346 #else
348  SG_WARNING("We require NLOPT library for using default minimizer.\nYou can use other minimizer. (eg, LBFGSMinimier)\n");
349 #endif //HAVE_NLOPT
350 #else
352  SG_WARNING("We require NLOPT (GPL License) library for using default minimizer.\nYou can use other minimizer. (eg, LBFGSMinimier)");
353 #endif //USE_GPL_SHOGUN
354  }
355 }
356 
358 {
360  return;
361 
362  REQUIRE(m_inducing_minimizer, "Please call enable_optimizing_inducing_features() first\n");
364  cost_fun->set_target(this);
365  bool cleanup=false;
366  if(this->ref_count()>1)
367  cleanup=true;
368 
369 #ifdef USE_GPL_SHOGUN
370 #ifdef HAVE_NLOPT
371  CNLOPTMinimizer* opt=dynamic_cast<CNLOPTMinimizer*>(m_inducing_minimizer);
372  if (opt)
373  opt->set_nlopt_parameters(LD_LBFGS, m_max_ind_iterations, m_ind_tolerance, m_ind_tolerance);
374 #endif //HAVE_NLOPT
375 #endif //USE_GPL_SHOGUN
376 
380  cost_fun->unset_target(cleanup);
381  SG_UNREF(cost_fun);
382 }
383 
384 }
virtual const char * get_name() const =0
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Kernel.cpp:97
float64_t m_log_scale
Definition: Inference.h:485
virtual SGVector< float64_t > get_derivative_wrt_inference_method(const TParameter *param)
int32_t index_t
Definition: common.h:72
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
virtual SGVector< float64_t > get_derivative_wrt_inducing_features(const TParameter *param)=0
CKernel * m_kernel
Definition: Inference.h:464
virtual void set_kernel(CKernel *kern)
virtual void set_max_iterations_for_inducing_features(int32_t it)
Definition: SGMatrix.h:25
parameter struct
#define REQUIRE(x,...)
Definition: SGIO.h:181
FirstOrderMinimizer * m_inducing_minimizer
virtual void enable_optimizing_inducing_features(bool is_optmization, FirstOrderMinimizer *minimizer=NULL)
An abstract class of the mean function.
Definition: MeanFunction.h:49
The first order cost function base class with bound constrains.
virtual void set_tolearance_for_inducing_features(float64_t tol)
#define SG_REF(x)
Definition: SGObject.h:52
CFeatures * m_features
Definition: Inference.h:473
SGMatrix< T > clone() const
Definition: SGMatrix.cpp:330
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:124
Class Lock used for synchronization in concurrent programs.
Definition: Lock.h:19
The sparse inference base class for classification and regression for 1-D labels (1D regression and b...
virtual const char * get_name() const
virtual SGVector< float64_t > get_derivative_wrt_kernel(const TParameter *param)
virtual void unset_cost_function(bool is_unref=true)
double float64_t
Definition: common.h:60
virtual void set_upper_bound_of_inducing_features(SGVector< float64_t > bound)
virtual CFeatures * get_inducing_features()
index_t num_rows
Definition: SGMatrix.h:495
virtual void set_kernel(CKernel *kern)
Definition: Inference.h:289
SGMatrix< float64_t > m_kuu
SGVector< T > clone() const
Definition: SGVector.cpp:262
index_t num_cols
Definition: SGMatrix.h:497
virtual float64_t minimize()=0
virtual void check_bound(SGVector< float64_t > bound, const char *name)
SG_FORCED_INLINE void lock()
Definition: Lock.h:23
SGVector< float64_t > m_ktrtr_diag
#define SG_UNREF(x)
Definition: SGObject.h:53
virtual void set_lower_bound_of_inducing_features(SGVector< float64_t > bound)
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
SGMatrix< float64_t > m_inducing_features
The class Features is the base class of all feature objects.
Definition: Features.h:69
virtual float64_t get_derivative_related_cov(SGVector< float64_t > ddiagKi, SGMatrix< float64_t > dKuui, SGMatrix< float64_t > dKui)=0
static float64_t exp(float64_t x)
Definition: Math.h:551
virtual SGMatrix< float64_t > get_parameter_gradient(const TParameter *param, index_t index=-1)
The Kernel base class.
SGMatrix< float64_t > m_ktru
SG_FORCED_INLINE void unlock()
Definition: Lock.h:34
int32_t ref_count()
Definition: SGObject.cpp:193
virtual SGVector< float64_t > get_derivative_wrt_inducing_noise(const TParameter *param)=0
The Fully Independent Conditional Training inference base class.
#define SG_WARNING(...)
Definition: SGIO.h:127
#define SG_ADD(...)
Definition: SGObject.h:93
virtual SGVector< float64_t > get_parameter_gradient_diagonal(const TParameter *param, index_t index=-1)
The Likelihood model base class.
virtual void set_cost_function(FirstOrderCostFunction *fun)
index_t vlen
Definition: SGVector.h:571
The first order minimizer base class.

SHOGUN Machine Learning Toolbox - Documentation