SHOGUN  6.1.3
LibLinearMTL.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2011-2012 Christian Widmer
8  * Written (W) 2007-2010 Soeren Sonnenburg
9  * Copyright (c) 2007-2009 The LIBLINEAR Project.
10  * Copyright (C) 2007-2012 Fraunhofer Institute FIRST and Max-Planck-Society
11  */
12 
13 #include <vector>
14 
15 #include <shogun/lib/config.h>
16 
17 #ifdef HAVE_LAPACK
18 #include <shogun/base/Parameter.h>
19 #include <shogun/base/progress.h>
21 #include <shogun/io/SGIO.h>
22 #include <shogun/lib/Signal.h>
23 #include <shogun/lib/Time.h>
26 
27 using namespace shogun;
28 
29 
32 {
33  init();
34 }
35 
37  float64_t C, CDotFeatures* traindat, CLabels* trainlab)
39 {
40  init();
41  C1=C;
42  C2=C;
43  use_bias=true;
44 
45  set_features(traindat);
46  set_labels(trainlab);
47 
48 }
49 
50 
51 void CLibLinearMTL::init()
52 {
53  use_bias=false;
54  C1=1;
55  C2=1;
57  epsilon=1e-5;
58 
59  SG_ADD(&C1, "C1", "C Cost constant 1.", MS_AVAILABLE);
60  SG_ADD(&C2, "C2", "C Cost constant 2.", MS_AVAILABLE);
61  SG_ADD(&use_bias, "use_bias", "Indicates if bias is used.",
63  SG_ADD(&epsilon, "epsilon", "Convergence precision.", MS_NOT_AVAILABLE);
64  SG_ADD(&max_iterations, "max_iterations", "Max number of iterations.",
66 
67 }
68 
70 {
71 }
72 
74 {
75 
77 
78  if (data)
79  {
80  if (!data->has_property(FP_DOT))
81  SG_ERROR("Specified features are not of type CDotFeatures\n")
82 
83  set_features((CDotFeatures*) data);
84  }
87 
88 
89  int32_t num_train_labels=m_labels->get_num_labels();
90  int32_t num_feat=features->get_dim_feature_space();
91  int32_t num_vec=features->get_num_vectors();
92 
93  if (num_vec!=num_train_labels)
94  {
95  SG_ERROR("number of vectors %d does not match "
96  "number of training labels %d\n",
97  num_vec, num_train_labels);
98  }
99 
100 
101  float64_t* training_w = NULL;
102  if (use_bias)
103  training_w=SG_MALLOC(float64_t, num_feat+1);
104  else
105  training_w=SG_MALLOC(float64_t, num_feat+0);
106 
107  liblinear_problem prob;
108  if (use_bias)
109  {
110  prob.n=num_feat+1;
111  memset(training_w, 0, sizeof(float64_t)*(num_feat+1));
112  }
113  else
114  {
115  prob.n=num_feat;
116  memset(training_w, 0, sizeof(float64_t)*(num_feat+0));
117  }
118  prob.l=num_vec;
119  prob.x=features;
120  prob.y=SG_MALLOC(float64_t, prob.l);
121  prob.use_bias=use_bias;
122 
123  for (int32_t i=0; i<prob.l; i++)
124  prob.y[i]=((CBinaryLabels*)m_labels)->get_label(i);
125 
126  int pos = 0;
127  int neg = 0;
128  for(int i=0;i<prob.l;i++)
129  {
130  if(prob.y[i]==+1)
131  pos++;
132  }
133  neg = prob.l - pos;
134 
135  SG_INFO("%d training points %d dims\n", prob.l, prob.n)
136  SG_INFO("%d positives, %d negatives\n", pos, neg)
137 
138  double Cp=C1;
139  double Cn=C2;
140  solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn);
141 
142  if (use_bias)
143  set_bias(training_w[num_feat]);
144  else
145  set_bias(0);
146 
147  SG_FREE(prob.y);
148 
149  SGVector<float64_t> w(num_feat);
150  for (int32_t i=0; i<num_feat; i++)
151  w[i] = training_w[i];
152  set_w(w);
153 
154  return true;
155 }
156 
157 // A coordinate descent algorithm for
158 // L1-loss and L2-loss SVM dual problems
159 //
160 // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
161 // s.t. 0 <= alpha_i <= upper_bound_i,
162 //
163 // where Qij = yi yj xi^T xj and
164 // D is a diagonal matrix
165 //
166 // In L1-SVM case:
167 // upper_bound_i = Cp if y_i = 1
168 // upper_bound_i = Cn if y_i = -1
169 // D_ii = 0
170 // In L2-SVM case:
171 // upper_bound_i = INF
172 // D_ii = 1/(2*Cp) if y_i = 1
173 // D_ii = 1/(2*Cn) if y_i = -1
174 //
175 // Given:
176 // x, y, Cp, Cn
177 // eps is the stopping tolerance
178 //
179 // solution will be put in w
180 
181 #undef GETI
182 #define GETI(i) (y[i]+1)
183 // To support weights for instances, use GETI(i) (i)
184 
185 
186 void CLibLinearMTL::solve_l2r_l1l2_svc(const liblinear_problem *prob, double eps, double Cp, double Cn)
187 {
188 
189 
190 
191  int l = prob->l;
192  int w_size = prob->n;
193  int i, s, iter = 0;
194  double C, d, G;
195  double *QD = SG_MALLOC(double, l);
196  int *index = SG_MALLOC(int, l);
197  //double *alpha = SG_MALLOC(double, l);
198 
199  int32_t *y = SG_MALLOC(int32_t, l);
200  int active_size = l;
201  // PG: projected gradient, for shrinking and stopping
202  double PG;
203  double PGmax_old = CMath::INFTY;
204  double PGmin_old = -CMath::INFTY;
205  double PGmax_new, PGmin_new;
206 
207  // matrix W
208  V = SGMatrix<float64_t>(w_size,num_tasks);
209 
210  // save alpha
212 
213 
214  // default solver_type: L2R_L2LOSS_SVC_DUAL
215  double diag[3] = {0.5/Cn, 0, 0.5/Cp};
216  double upper_bound[3] = {CMath::INFTY, 0, CMath::INFTY};
217  if(true)
218  {
219  diag[0] = 0;
220  diag[2] = 0;
221  upper_bound[0] = Cn;
222  upper_bound[2] = Cp;
223  }
224 
225  int n = prob->n;
226 
227  if (prob->use_bias)
228  n--;
229 
230  // set V to zero
231  for(int32_t k=0; k<w_size*num_tasks; k++)
232  {
233  V.matrix[k] = 0;
234  }
235 
236  // init alphas
237  for(i=0; i<l; i++)
238  {
239  alphas[i] = 0;
240  }
241 
242  for(i=0; i<l; i++)
243  {
244  if(prob->y[i] > 0)
245  {
246  y[i] = +1;
247  }
248  else
249  {
250  y[i] = -1;
251  }
252  QD[i] = diag[GETI(i)];
253  QD[i] += prob->x->dot(i, prob->x,i);
254  index[i] = i;
255  }
256 
257  auto pb = progress(range(10));
258  CTime start_time;
259  while (iter < max_iterations && !cancel_computation())
260  {
261  if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
262  break;
263 
264  PGmax_new = -CMath::INFTY;
265  PGmin_new = CMath::INFTY;
266 
267  for (i=0; i<active_size; i++)
268  {
269  int j = CMath::random(i, active_size-1);
270  CMath::swap(index[i], index[j]);
271  }
272 
273  for (s=0;s<active_size;s++)
274  {
275  i = index[s];
276  int32_t yi = y[i];
277  int32_t ti = task_indicator_lhs[i];
278  C = upper_bound[GETI(i)];
279 
280  // we compute the inner sum by looping over tasks
281  // this update is the main result of MTL_DCD
282  typedef std::map<index_t, float64_t>::const_iterator map_iter;
283 
284  float64_t inner_sum = 0;
285  for (map_iter it=task_similarity_matrix.data[ti].begin(); it!=task_similarity_matrix.data[ti].end(); it++)
286  {
287 
288  // get data from sparse matrix
289  int32_t e_i = it->first;
290  float64_t sim = it->second;
291 
292  // fetch vector
293  float64_t* tmp_w = V.get_column_vector(e_i);
294  inner_sum += sim * yi * prob->x->dense_dot(i, tmp_w, n);
295 
296  //possibly deal with bias
297  //if (prob->use_bias)
298  // G+=w[n];
299  }
300 
301  // compute gradient
302  G = inner_sum-1.0;
303 
304  // check if point can be removed from active set
305  PG = 0;
306  if (alphas[i] == 0)
307  {
308  if (G > PGmax_old)
309  {
310  active_size--;
311  CMath::swap(index[s], index[active_size]);
312  s--;
313  continue;
314  }
315  else if (G < 0)
316  PG = G;
317  }
318  else if (alphas[i] == C)
319  {
320  if (G < PGmin_old)
321  {
322  active_size--;
323  CMath::swap(index[s], index[active_size]);
324  s--;
325  continue;
326  }
327  else if (G > 0)
328  PG = G;
329  }
330  else
331  PG = G;
332 
333  PGmax_new = CMath::max(PGmax_new, PG);
334  PGmin_new = CMath::min(PGmin_new, PG);
335 
336  if(fabs(PG) > 1.0e-12)
337  {
338  // save previous alpha
339  double alpha_old = alphas[i];
340 
341  // project onto feasible set
342  alphas[i] = CMath::min(CMath::max(alphas[i] - G/QD[i], 0.0), C);
343  d = (alphas[i] - alpha_old)*yi;
344 
345  // update corresponding weight vector
346  float64_t* tmp_w = V.get_column_vector(ti);
347  prob->x->add_to_dense_vec(d, i, tmp_w, n);
348 
349 
350  //if (prob->use_bias)
351  // w[n]+=d;
352  }
353  }
354 
355  iter++;
356  float64_t gap=PGmax_new - PGmin_new;
357  pb.print_absolute(
358  gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps));
359 
360  if(gap <= eps)
361  {
362  if(active_size == l)
363  break;
364  else
365  {
366  active_size = l;
367  PGmax_old = CMath::INFTY;
368  PGmin_old = -CMath::INFTY;
369  continue;
370  }
371  }
372  PGmax_old = PGmax_new;
373  PGmin_old = PGmin_new;
374  if (PGmax_old <= 0)
375  PGmax_old = CMath::INFTY;
376  if (PGmin_old >= 0)
377  PGmin_old = -CMath::INFTY;
378  }
379 
380  pb.complete_absolute();
381  SG_INFO("optimization finished, #iter = %d\n",iter)
382  if (iter >= max_iterations)
383  {
384  SG_WARNING("reaching max number of iterations\nUsing -s 2 may be faster"
385  "(also see liblinear FAQ)\n\n");
386  }
387 
388 
389 
390  delete [] QD;
391  //delete [] alpha;
392  delete [] y;
393  delete [] index;
394 }
395 
396 
398 {
399  /* python protype
400  num_param = param.shape[0]
401  num_dim = len(all_xt[0])
402  num_tasks = int(num_param / num_dim)
403  num_examples = len(all_xt)
404 
405 # vector to matrix
406 W = param.reshape(num_tasks, num_dim)
407 
408 obj = 0
409 
410 reg_obj = 0
411 loss_obj = 0
412 
413 assert len(all_xt) == len(all_xt) == len(task_indicator)
414 
415 # L2 regularizer
416 for t in xrange(num_tasks):
417 reg_obj += 0.5 * np.dot(W[t,:], W[t,:])
418 
419 # MTL regularizer
420 for s in xrange(num_tasks):
421 for t in xrange(num_tasks):
422 reg_obj += 0.5 * L[s,t] * np.dot(W[s,:], W[t,:])
423 
424 # loss
425 for i in xrange(num_examples):
426 ti = task_indicator[i]
427 t = all_lt[i] * np.dot(W[ti,:], all_xt[i])
428 # hinge
429 loss_obj += max(0, 1 - t)
430 
431 
432 # combine to final objective
433 obj = reg_obj + C * loss_obj
434 
435 
436 return obj
437 */
438 
439  SG_INFO("DONE to compute Primal OBJ\n")
440  // calculate objective value
442 
443  float64_t obj = 0;
444  int32_t num_vec = features->get_num_vectors();
445  int32_t w_size = features->get_dim_feature_space();
446 
447  // L2 regularizer
448  for (int32_t t=0; t<num_tasks; t++)
449  {
450  float64_t* w_t = W.get_column_vector(t);
451 
452  for(int32_t i=0; i<w_size; i++)
453  {
454  obj += 0.5 * w_t[i]*w_t[i];
455  }
456  }
457 
458  // MTL regularizer
459  for (int32_t s=0; s<num_tasks; s++)
460  {
461  float64_t* w_s = W.get_column_vector(s);
462  for (int32_t t=0; t<num_tasks; t++)
463  {
464  float64_t* w_t = W.get_column_vector(t);
465  float64_t l = graph_laplacian.matrix[s*num_tasks+t];
466 
467  for(int32_t i=0; i<w_size; i++)
468  {
469  obj += 0.5 * l * w_s[i]*w_t[i];
470  }
471  }
472  }
473 
474  // loss
475  for(int32_t i=0; i<num_vec; i++)
476  {
477  int32_t ti = task_indicator_lhs[i];
478  float64_t* w_t = W.get_column_vector(ti);
479  float64_t residual = ((CBinaryLabels*)m_labels)->get_label(i) * features->dense_dot(i, w_t, w_size);
480 
481  // hinge loss
482  obj += C1 * CMath::max(0.0, 1 - residual);
483 
484  }
485 
486  SG_INFO("DONE to compute Primal OBJ, obj=%f\n",obj)
487 
488  return obj;
489 }
490 
492 {
493  /* python prototype
494  num_xt = len(xt)
495 
496 # compute quadratic term
497 for i in xrange(num_xt):
498 for j in xrange(num_xt):
499 
500 s = task_indicator[i]
501 t = task_indicator[j]
502 
503 obj -= 0.5 * M[s,t] * alphas[i] * alphas[j] * lt[i] * lt[j] * np.dot(xt[i], xt[j])
504 
505 return obj
506 */
507 
508  SG_INFO("starting to compute DUAL OBJ\n")
509 
510  int32_t num_vec=features->get_num_vectors();
511 
512  float64_t obj = 0;
513 
514  // compute linear term
515  for(int32_t i=0; i<num_vec; i++)
516  {
517  obj += alphas[i];
518  }
519 
520  // compute quadratic term
521 
522  int32_t v_size = features->get_dim_feature_space();
523 
524  // efficient computation
525  for (int32_t s=0; s<num_tasks; s++)
526  {
527  float64_t* v_s = V.get_column_vector(s);
528  for (int32_t t=0; t<num_tasks; t++)
529  {
530  float64_t* v_t = V.get_column_vector(t);
531  const float64_t ts = task_similarity_matrix(s, t);
532 
533  for(int32_t i=0; i<v_size; i++)
534  {
535  obj -= 0.5 * ts * v_s[i]*v_t[i];
536  }
537  }
538  }
539 
540  /*
541  // naiive implementation
542  float64_t tmp_val2 = 0;
543 
544  for(int32_t i=0; i<num_vec; i++)
545  {
546  int32_t ti_i = task_indicator_lhs[i];
547  for(int32_t j=0; j<num_vec; j++)
548  {
549  // look up task similarity
550  int32_t ti_j = task_indicator_lhs[j];
551 
552  const float64_t ts = task_similarity_matrix(ti_i, ti_j);
553 
554  // compute objective
555  tmp_val2 -= 0.5 * alphas[i] * alphas[j] * ts * ((CBinaryLabels*)m_labels)->get_label(i) *
556  ((CBinaryLabels*)m_labels)->get_label(j) * features->dot(i, features,j);
557  }
558  }
559  */
560 
561 
562  return obj;
563 }
564 
565 
567 {
568  return 0.0;
569 }
570 
571 
572 #endif //HAVE_LAPACK
Class Time that implements a stopwatch based on either cpu time or wall clock time.
Definition: Time.h:42
#define SG_INFO(...)
Definition: SGIO.h:117
static T max(T a, T b)
Definition: Math.h:149
virtual void set_w(const SGVector< float64_t > src_w)
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
static const float64_t INFTY
infinity
Definition: Math.h:1868
virtual float64_t dense_dot(int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len)=0
virtual int32_t get_num_labels() const =0
PRange< T > progress(Range< T > range, const SGIO &io, std::string prefix="PROGRESS: ", SG_PRG_MODE mode=UTF8, std::function< bool()> condition=[](){return true;})
Definition: progress.h:712
virtual float64_t compute_dual_obj()
static float64_t log10(float64_t v)
Definition: Math.h:693
virtual float64_t compute_primal_obj()
virtual int32_t get_num_vectors() const =0
float64_t m_max_train_time
Definition: Machine.h:433
CLabels * m_labels
Definition: Machine.h:436
#define SG_ERROR(...)
Definition: SGIO.h:128
SGMatrix< float64_t > V
Definition: LibLinearMTL.h:347
Range< T > range(T rend)
Definition: range.h:136
Features that support dot products among other operations.
Definition: DotFeatures.h:44
static uint64_t random()
Definition: Math.h:811
virtual int32_t get_dim_feature_space() const =0
float64_t cur_time_diff(bool verbose=false)
Definition: Time.cpp:68
#define ASSERT(x)
Definition: SGIO.h:176
std::vector< std::map< index_t, float64_t > > data
Definition: LibLinearMTL.h:85
double float64_t
Definition: common.h:60
virtual void set_features(CDotFeatures *feat)
virtual bool train_machine(CFeatures *data=NULL)
Class LinearMachine is a generic interface for all kinds of linear machines like classifiers.
Definition: LinearMachine.h:63
virtual float64_t compute_duality_gap()
SG_FORCED_INLINE bool cancel_computation() const
Definition: Machine.h:319
CDotFeatures * features
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
#define GETI(i)
SGMatrix< float64_t > get_W()
Definition: LibLinearMTL.h:237
The class Features is the base class of all feature objects.
Definition: Features.h:69
SGVector< int32_t > task_indicator_lhs
Definition: LibLinearMTL.h:333
Binary Labels for binary classification.
Definition: BinaryLabels.h:37
MappedSparseMatrix task_similarity_matrix
Definition: LibLinearMTL.h:341
static void swap(T &a, T &b)
Definition: Math.h:406
virtual void set_bias(float64_t b)
#define SG_WARNING(...)
Definition: SGIO.h:127
#define SG_ADD(...)
Definition: SGObject.h:93
SGMatrix< float64_t > graph_laplacian
Definition: LibLinearMTL.h:344
void set_max_iterations(int32_t max_iter=1000)
Definition: LibLinearMTL.h:171
bool has_property(EFeatureProperty p) const
Definition: Features.cpp:295
virtual void set_labels(CLabels *lab)
Definition: Machine.cpp:72
static T min(T a, T b)
Definition: Math.h:138
virtual void ensure_valid(const char *context=NULL)=0
T * get_column_vector(index_t col) const
Definition: SGMatrix.h:144
SGVector< float64_t > alphas
Definition: LibLinearMTL.h:327

SHOGUN Machine Learning Toolbox - Documentation