SHOGUN  6.1.3
KernelMachine.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 2011-2012 Heiko Strathmann
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #include <shogun/base/progress.h>
13 #include <shogun/io/SGIO.h>
16 
17 #include <shogun/kernel/Kernel.h>
19 #include <shogun/labels/Labels.h>
20 
21 #ifdef HAVE_OPENMP
22 #include <omp.h>
23 
24 #endif
25 
26 using namespace shogun;
27 
28 #ifndef DOXYGEN_SHOULD_SKIP_THIS
29 struct S_THREAD_PARAM_KERNEL_MACHINE
30 {
31  CKernelMachine* kernel_machine;
32  float64_t* result;
33  int32_t start;
34  int32_t end;
35 
36  /* if non-null, start and end correspond to indices in this vector */
37  index_t* indices;
38  index_t indices_len;
39  bool verbose;
40 };
41 #endif // DOXYGEN_SHOULD_SKIP_THIS
42 
44 {
45  init();
46 }
47 
50 {
51  init();
52 
53  int32_t num_sv=svs.vlen;
54  ASSERT(num_sv == alphas.vlen)
55  create_new_model(num_sv);
56  set_alphas(alphas);
59  set_bias(b);
60 }
61 
63 {
64  init();
65 
66  SGVector<float64_t> alphas = machine->get_alphas().clone();
67  SGVector<int32_t> svs = machine->get_support_vectors().clone();
68  float64_t bias = machine->get_bias();
69  CKernel* ker = machine->get_kernel();
70 
71  int32_t num_sv = svs.vlen;
72  create_new_model(num_sv);
73  set_alphas(alphas);
75  set_bias(bias);
76  set_kernel(ker);
77 }
78 
80 {
84 }
85 
87 {
88  SG_REF(k);
90  kernel=k;
91 }
92 
94 {
95  SG_REF(kernel);
96  return kernel;
97 }
98 
100 {
101  use_batch_computation=enable;
102 }
103 
105 {
106  return use_batch_computation;
107 }
108 
110 {
111  use_linadd=enable;
112 }
113 
115 {
116  return use_linadd;
117 }
118 
119 void CKernelMachine::set_bias_enabled(bool enable_bias)
120 {
121  use_bias=enable_bias;
122 }
123 
125 {
126  return use_bias;
127 }
128 
130 {
131  return m_bias;
132 }
133 
135 {
136  m_bias=bias;
137 }
138 
140 {
141  ASSERT(m_svs.vector && idx<m_svs.vlen)
142  return m_svs.vector[idx];
143 }
144 
146 {
147  if (!m_alpha.vector)
148  SG_ERROR("No alphas set\n")
149  if (idx>=m_alpha.vlen)
150  SG_ERROR("Alphas index (%d) out of range (%d)\n", idx, m_svs.vlen)
151  return m_alpha.vector[idx];
152 }
153 
154 bool CKernelMachine::set_support_vector(int32_t idx, int32_t val)
155 {
156  if (m_svs.vector && idx<m_svs.vlen)
157  m_svs.vector[idx]=val;
158  else
159  return false;
160 
161  return true;
162 }
163 
164 bool CKernelMachine::set_alpha(int32_t idx, float64_t val)
165 {
166  if (m_alpha.vector && idx<m_alpha.vlen)
167  m_alpha.vector[idx]=val;
168  else
169  return false;
170 
171  return true;
172 }
173 
175 {
176  return m_svs.vlen;
177 }
178 
180 {
181  m_alpha = alphas;
182 }
183 
185 {
186  m_svs = svs;
187 }
188 
190 {
191  return m_svs;
192 }
193 
195 {
196  return m_alpha;
197 }
198 
200 {
203 
204  m_bias=0;
205 
206  if (num>0)
207  {
209  m_svs= SGVector<int32_t>(num);
210  return (m_alpha.vector!=NULL && m_svs.vector!=NULL);
211  }
212  else
213  return true;
214 }
215 
217 {
218  int32_t num_sv=get_num_support_vectors();
219 
220  if (kernel && kernel->has_property(KP_LINADD) && num_sv>0)
221  {
222  int32_t * sv_idx = SG_MALLOC(int32_t, num_sv);
223  float64_t* sv_weight = SG_MALLOC(float64_t, num_sv);
224 
225  for(int32_t i=0; i<num_sv; i++)
226  {
227  sv_idx[i] = get_support_vector(i) ;
228  sv_weight[i] = get_alpha(i) ;
229  }
230 
231  bool ret = kernel->init_optimization(num_sv, sv_idx, sv_weight) ;
232 
233  SG_FREE(sv_idx);
234  SG_FREE(sv_weight);
235 
236  if (!ret)
237  SG_ERROR("initialization of kernel optimization failed\n")
238 
239  return ret;
240  }
241  else
242  SG_ERROR("initialization of kernel optimization failed\n")
243 
244  return false;
245 }
246 
248 {
249  SGVector<float64_t> outputs = apply_get_outputs(data);
250  return new CRegressionLabels(outputs);
251 }
252 
254 {
255  SGVector<float64_t> outputs = apply_get_outputs(data);
256  return new CBinaryLabels(outputs);
257 }
258 
260 {
261  SG_DEBUG("entering %s::apply_get_outputs(%s at %p)\n",
262  get_name(), data ? data->get_name() : "NULL", data);
263 
264  REQUIRE(kernel, "%s::apply_get_outputs(): No kernel assigned!\n")
265 
266  if (!kernel->get_num_vec_lhs())
267  {
268  SG_ERROR("%s: No vectors on left hand side (%s). This is probably due to"
269  " an implementation error in %s, where it was forgotten to set "
270  "the data (m_svs) indices\n", get_name(),
271  data->get_name());
272  }
273 
274  if (data)
275  {
276  CFeatures* lhs=kernel->get_lhs();
277  REQUIRE(lhs, "%s::apply_get_outputs(): No left hand side specified\n",
278  get_name());
279  kernel->init(lhs, data);
280  SG_UNREF(lhs);
281  }
282 
283  /* using the features to get num vectors is far safer than using the kernel
284  * since SHOGUNs kernel num_rhs/num_lhs is buggy (CombinedKernel for ex.)
285  * Might be worth investigating why
286  * kernel->get_num_rhs() != rhs->get_num_vectors()
287  * However, the below version works
288  * TODO Heiko Strathmann
289  */
290  CFeatures* rhs=kernel->get_rhs();
291  int32_t num_vectors=rhs ? rhs->get_num_vectors() : kernel->get_num_vec_rhs();
292  SG_UNREF(rhs)
293 
294  SGVector<float64_t> output(num_vectors);
295 
296  if (kernel->get_num_vec_rhs()>0)
297  {
298  SG_DEBUG("computing output on %d test examples\n", num_vectors)
299 
300  if (io->get_show_progress())
301  io->enable_progress();
302  else
303  io->disable_progress();
304 
307  {
308  output.zero();
309  SG_DEBUG("Batch evaluation enabled\n")
310  if (get_num_support_vectors()>0)
311  {
312  int32_t* sv_idx=SG_MALLOC(int32_t, get_num_support_vectors());
313  float64_t* sv_weight=SG_MALLOC(float64_t, get_num_support_vectors());
314  int32_t* idx=SG_MALLOC(int32_t, num_vectors);
315 
316  //compute output for all vectors v[0]...v[num_vectors-1]
317  for (int32_t i=0; i<num_vectors; i++)
318  idx[i]=i;
319 
320  for (int32_t i=0; i<get_num_support_vectors(); i++)
321  {
322  sv_idx[i] = get_support_vector(i) ;
323  sv_weight[i] = get_alpha(i) ;
324  }
325 
326  kernel->compute_batch(num_vectors, idx,
327  output.vector, get_num_support_vectors(), sv_idx, sv_weight);
328  SG_FREE(sv_idx);
329  SG_FREE(sv_weight);
330  SG_FREE(idx);
331  }
332 
333  for (int32_t i=0; i<num_vectors; i++)
334  output[i] = get_bias() + output[i];
335 
336  }
337  else
338  {
339  auto pb = progress(range(num_vectors));
340  int32_t num_threads;
341  int64_t step;
342 #pragma omp parallel shared(num_threads, step)
343  {
344 
345 #ifdef HAVE_OPENMP
346 #pragma omp single
347  {
348  num_threads = omp_get_num_threads();
349  step = num_vectors / num_threads;
350  num_threads--;
351  }
352  int32_t thread_num = omp_get_thread_num();
353 #else
354  num_threads = 0;
355  step = num_vectors;
356  int32_t thread_num = 0;
357 #endif
358  int32_t start = thread_num * step;
359  int32_t end = (thread_num == num_threads)
360  ? num_vectors
361  : (thread_num + 1) * step;
362 
363 #ifdef WIN32
364  for (int32_t vec = start; vec < end; vec++)
365 #else
366  for (int32_t vec = start; vec < end && !cancel_computation();
367  vec++)
368 #endif
369  {
370  pb.print_progress();
371 
372  ASSERT(kernel)
373  if (kernel->has_property(KP_LINADD) &&
375  {
376  float64_t score = kernel->compute_optimized(vec);
377  output[vec] = score + get_bias();
378  }
379  else
380  {
381  float64_t score = 0;
382  for (int32_t i = 0; i < get_num_support_vectors(); i++)
383  score +=
384  kernel->kernel(get_support_vector(i), vec) *
385  get_alpha(i);
386  output[vec] = score + get_bias();
387  }
388  }
389  }
390  pb.complete();
391  }
392 
393 #ifndef WIN32
394  if (cancel_computation())
395  SG_INFO("prematurely stopped. \n")
396 #endif
397  }
398 
399  SG_DEBUG("leaving %s::apply_get_outputs(%s at %p)\n",
400  get_name(), data ? data->get_name() : "NULL", data);
401 
402  return output;
403 }
404 
406 {
407  if (!kernel)
408  SG_ERROR("kernel is needed to store SV features.\n")
409 
410  CFeatures* lhs=kernel->get_lhs();
411  CFeatures* rhs=kernel->get_rhs();
412 
413  if (!lhs)
414  SG_ERROR("kernel lhs is needed to store SV features.\n")
415 
416  /* copy sv feature data */
417  CFeatures* sv_features=lhs->copy_subset(m_svs);
418  SG_UNREF(lhs);
419 
420  /* set new lhs to kernel */
421  kernel->init(sv_features, rhs);
422 
423  /* unref rhs */
424  SG_UNREF(rhs);
425 
426  /* was SG_REF'ed by copy_subset */
427  SG_UNREF(sv_features);
428 
429  /* now sv indices are just the identity */
430  m_svs.range_fill();
431 
432 }
433 
435 {
436  SG_DEBUG("entering %s::train_locked()\n", get_name())
437  if (!is_data_locked())
438  SG_ERROR("CKernelMachine::train_locked() call data_lock() before!\n")
439 
440  /* this is asusmed here */
442 
443  /* since its not easily possible to controll the row subsets of the custom
444  * kernel from outside, we enforce that there is only one row subset by
445  * removing all of them. Otherwise, they would add up in the stack until
446  * an error occurs */
448 
449  /* set custom kernel subset of data to train on */
452 
453  /* set corresponding labels subset */
454  m_labels->add_subset(indices);
455 
456  /* dont do train because model should not be stored (no acutal features)
457  * and train does data_unlock */
458  bool result=train_machine();
459 
460  /* remove last col subset of custom kernel */
462 
463  /* remove label subset after training */
465 
466  SG_DEBUG("leaving %s::train_locked()\n", get_name())
467  return result;
468 }
469 
471 {
472  SGVector<float64_t> outputs = apply_locked_get_output(indices);
473  return new CBinaryLabels(outputs);
474 }
475 
477  SGVector<index_t> indices)
478 {
479  SGVector<float64_t> outputs = apply_locked_get_output(indices);
480  return new CRegressionLabels(outputs);
481 }
482 
484  SGVector<index_t> indices)
485 {
486  if (!is_data_locked())
487  SG_ERROR("CKernelMachine::apply_locked() call data_lock() before!\n")
488 
489  /* we are working on a custom kernel here */
491 
492  int32_t num_inds=indices.vlen;
493  SGVector<float64_t> output(num_inds);
494 
495  if (io->get_show_progress())
496  io->enable_progress();
497  else
498  io->disable_progress();
499 
500  /* custom kernel never has batch evaluation property so dont do this here */
501  auto pb = progress(range(0, num_inds));
502  int32_t num_threads;
503  int64_t step;
504 #pragma omp parallel shared(num_threads, step)
505  {
506 #ifdef HAVE_OPENMP
507 #pragma omp single
508  {
509  num_threads = omp_get_num_threads();
510  step = num_inds / num_threads;
511  num_threads--;
512  }
513  int32_t thread_num = omp_get_thread_num();
514 #else
515  num_threads = 0;
516  step = num_inds;
517  int32_t thread_num = 0;
518 #endif
519  int32_t start = thread_num * step;
520  int32_t end =
521  (thread_num == num_threads) ? num_inds : (thread_num + 1) * step;
522 #ifdef WIN32
523  for (int32_t vec = start; vec < end; vec++)
524 #else
525  for (int32_t vec = start; vec < end && !cancel_computation(); vec++)
526 #endif
527  {
528  pb.print_progress();
529  index_t index = indices[vec];
530  ASSERT(kernel)
531  if (kernel->has_property(KP_LINADD) &&
533  {
534  float64_t score = kernel->compute_optimized(index);
535  output[vec] = score + get_bias();
536  }
537  else
538  {
539  float64_t score = 0;
540  for (int32_t i = 0; i < get_num_support_vectors(); i++)
541  score += kernel->kernel(get_support_vector(i), index) *
542  get_alpha(i);
543 
544  output[vec] = score + get_bias();
545  }
546  }
547  }
548 
549 #ifndef WIN32
550  if (cancel_computation())
551  SG_INFO("prematurely stopped.\n")
552  else
553 #endif
554  pb.complete();
555 
556  return output;
557 }
558 
560 {
561  ASSERT(kernel)
562 
564  {
565  float64_t score = kernel->compute_optimized(num);
566  return score + get_bias();
567  }
568  else
569  {
570  float64_t score = 0;
571  for (int32_t i = 0; i < get_num_support_vectors(); i++)
572  score += kernel->kernel(get_support_vector(i), num) * get_alpha(i);
573 
574  return score + get_bias();
575  }
576 }
577 
579 {
580  if ( !kernel )
581  SG_ERROR("The kernel is not initialized\n")
583  SG_ERROR("Locking is not supported (yet) with combined kernel. Please disable it in cross validation")
584 
585  /* init kernel with data */
586  kernel->init(features, features);
587 
588  /* backup reference to old kernel */
592 
593  /* unref possible old custom kernel */
595 
596  /* create custom kernel matrix from current kernel */
599 
600  /* replace kernel by custom kernel */
601  SG_UNREF(kernel);
603  SG_REF(kernel);
604 
605  /* dont forget to call superclass method */
606  CMachine::data_lock(labs, features);
607 }
608 
610 {
612  m_custom_kernel=NULL;
613 
614  /* restore original kernel, possibly delete created one */
615  if (m_kernel_backup)
616  {
617  /* check if kernel was created in train_locked */
618  if (kernel!=m_kernel_backup)
619  SG_UNREF(kernel);
620 
622  m_kernel_backup=NULL;
623  }
624 
625  /* dont forget to call superclass method */
627 }
628 
629 void CKernelMachine::init()
630 {
631  m_bias=0.0;
632  kernel=NULL;
633  m_custom_kernel=NULL;
634  m_kernel_backup=NULL;
636  use_linadd=true;
637  use_bias=true;
638 
639  SG_ADD((CSGObject**) &kernel, "kernel", "", MS_AVAILABLE);
640  SG_ADD((CSGObject**) &m_custom_kernel, "custom_kernel", "Custom kernel for"
641  " data lock", MS_NOT_AVAILABLE);
642  SG_ADD((CSGObject**) &m_kernel_backup, "kernel_backup",
643  "Kernel backup for data lock", MS_NOT_AVAILABLE);
644  SG_ADD(&use_batch_computation, "use_batch_computation",
645  "Batch computation is enabled.", MS_NOT_AVAILABLE);
646  SG_ADD(&use_linadd, "use_linadd", "Linadd is enabled.", MS_NOT_AVAILABLE);
647  SG_ADD(&use_bias, "use_bias", "Bias shall be used.", MS_NOT_AVAILABLE);
648  SG_ADD(&m_bias, "m_bias", "Bias term.", MS_NOT_AVAILABLE);
649  SG_ADD(&m_alpha, "m_alpha", "Array of coefficients alpha.",
651  SG_ADD(&m_svs, "m_svs", "Number of ``support vectors''.", MS_NOT_AVAILABLE);
652 }
653 
655 {
656  return true;
657 }
virtual float64_t apply_one(int32_t num)
virtual const char * get_name() const =0
SGVector< float64_t > apply_get_outputs(CFeatures *data)
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Kernel.cpp:97
SGVector< int32_t > m_svs
void set_bias_enabled(bool enable_bias)
#define SG_INFO(...)
Definition: SGIO.h:117
virtual CBinaryLabels * apply_locked_binary(SGVector< index_t > indices)
Real Labels are real-valued labels.
int32_t index_t
Definition: common.h:72
virtual void add_row_subset(SGVector< index_t > subset)
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
The Custom Kernel allows for custom user provided kernel matrices.
Definition: CustomKernel.h:36
virtual const char * get_name() const
Definition: KernelMachine.h:80
PRange< T > progress(Range< T > range, const SGIO &io, std::string prefix="PROGRESS: ", SG_PRG_MODE mode=UTF8, std::function< bool()> condition=[](){return true;})
Definition: progress.h:712
virtual CRegressionLabels * apply_regression(CFeatures *data=NULL)
SGVector< int32_t > get_support_vectors()
virtual int32_t get_num_vectors() const =0
CCustomKernel * m_custom_kernel
CLabels * m_labels
Definition: Machine.h:436
CFeatures * get_rhs()
#define SG_ERROR(...)
Definition: SGIO.h:128
#define REQUIRE(x,...)
Definition: SGIO.h:181
float64_t kernel(int32_t idx_a, int32_t idx_b)
virtual bool train_machine(CFeatures *data=NULL)
Definition: Machine.h:361
A generic KernelMachine interface.
Definition: KernelMachine.h:51
Range< T > range(T rend)
Definition: range.h:136
virtual int32_t get_num_vec_lhs()
virtual void remove_all_row_subsets()
#define SG_REF(x)
Definition: SGObject.h:52
A generic learning machine interface.
Definition: Machine.h:151
bool get_is_initialized()
void set_support_vectors(SGVector< int32_t > svs)
virtual bool train_locked(SGVector< index_t > indices)
SGVector< float64_t > m_alpha
virtual void remove_col_subset()
bool has_property(EKernelProperty p)
virtual void add_col_subset(SGVector< index_t > subset)
virtual void store_model_features()
#define ASSERT(x)
Definition: SGIO.h:176
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:124
void set_bias(float64_t bias)
void set_batch_computation_enabled(bool enable)
virtual SGVector< float64_t > apply_locked_get_output(SGVector< index_t > indices)
void disable_progress()
Definition: SGIO.h:368
double float64_t
Definition: common.h:60
bool set_alpha(int32_t idx, float64_t val)
virtual void data_unlock()
Definition: Machine.cpp:150
virtual void data_unlock()
virtual void data_lock(CLabels *labs, CFeatures *features)
Definition: Machine.cpp:119
virtual void remove_subset()
Definition: Labels.cpp:51
void range_fill(T start=0)
Definition: SGVector.cpp:223
virtual float64_t compute_optimized(int32_t vector_idx)
Definition: Kernel.cpp:817
float64_t get_alpha(int32_t idx)
virtual void add_subset(SGVector< index_t > subset)
Definition: Labels.cpp:41
virtual bool supports_locking() const
bool set_support_vector(int32_t idx, int32_t val)
SGVector< T > clone() const
Definition: SGVector.cpp:262
int32_t get_support_vector(int32_t idx)
virtual int32_t get_num_vec_rhs()
SG_FORCED_INLINE bool cancel_computation() const
Definition: Machine.h:319
bool get_show_progress() const
Definition: SGIO.h:255
SGVector< float64_t > get_alphas()
#define SG_UNREF(x)
Definition: SGObject.h:53
#define SG_DEBUG(...)
Definition: SGIO.h:106
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
virtual void compute_batch(int32_t num_vec, int32_t *vec_idx, float64_t *target, int32_t num_suppvec, int32_t *IDX, float64_t *alphas, float64_t factor=1.0)
Definition: Kernel.cpp:823
virtual bool init_optimization(int32_t count, int32_t *IDX, float64_t *weights)
Definition: Kernel.cpp:804
virtual CFeatures * copy_subset(SGVector< index_t > indices)
Definition: Features.cpp:340
virtual CRegressionLabels * apply_locked_regression(SGVector< index_t > indices)
void set_alphas(SGVector< float64_t > alphas)
The class Features is the base class of all feature objects.
Definition: Features.h:69
void set_linadd_enabled(bool enable)
The Kernel base class.
Binary Labels for binary classification.
Definition: BinaryLabels.h:37
void set_kernel(CKernel *k)
#define SG_ADD(...)
Definition: SGObject.h:93
bool is_data_locked() const
Definition: Machine.h:308
virtual CBinaryLabels * apply_binary(CFeatures *data=NULL)
bool create_new_model(int32_t num)
index_t vlen
Definition: SGVector.h:571
CFeatures * get_lhs()
virtual void data_lock(CLabels *labs, CFeatures *features=NULL)
void enable_progress()
Definition: SGIO.h:358

SHOGUN Machine Learning Toolbox - Documentation