SHOGUN  6.1.3
shogun_liblinear.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2007-2009 The LIBLINEAR Project.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  * notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  * notice, this list of conditions and the following disclaimer in the
14  * documentation and/or other materials provided with the distribution.
15  *
16  * 3. Neither name of copyright holders nor the names of its contributors
17  * may be used to endorse or promote products derived from this software
18  * without specific prior written permission.
19  *
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
25  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <shogun/lib/config.h>
34 #ifndef DOXYGEN_SHOULD_SKIP_THIS
35 #include <math.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <stdarg.h>
40 
45 #include <shogun/lib/Time.h>
46 #include <shogun/lib/Signal.h>
47 
48 using namespace shogun;
49 
50 l2r_lr_fun::l2r_lr_fun(const liblinear_problem *p, float64_t* Cs)
51 {
52  int l=p->l;
53 
54  this->m_prob = p;
55 
56  z = SG_MALLOC(double, l);
57  D = SG_MALLOC(double, l);
58  C = Cs;
59 }
60 
61 l2r_lr_fun::~l2r_lr_fun()
62 {
63  SG_FREE(z);
64  SG_FREE(D);
65 }
66 
67 
68 double l2r_lr_fun::fun(double *w)
69 {
70  int i;
71  double f=0;
72  float64_t *y=m_prob->y;
73  int l=m_prob->l;
74  int32_t n=m_prob->n;
75 
76  Xv(w, z);
77  for(i=0;i<l;i++)
78  {
79  double yz = y[i]*z[i];
80  if (yz >= 0)
81  f += C[i]*log(1 + exp(-yz));
82  else
83  f += C[i]*(-yz+log(1 + exp(yz)));
84  }
85  SGVector<float64_t> w_wrap(w, n, false);
86  f += 0.5 *linalg::dot(w_wrap, w_wrap);
87 
88  return(f);
89 }
90 
91 void l2r_lr_fun::grad(double *w, double *g)
92 {
93  int i;
94  float64_t *y=m_prob->y;
95  int l=m_prob->l;
96  int w_size=get_nr_variable();
97 
98  for(i=0;i<l;i++)
99  {
100  z[i] = 1/(1 + exp(-y[i]*z[i]));
101  D[i] = z[i]*(1-z[i]);
102  z[i] = C[i]*(z[i]-1)*y[i];
103  }
104  XTv(z, g);
105 
106  for(i=0;i<w_size;i++)
107  g[i] = w[i] + g[i];
108 }
109 
110 int l2r_lr_fun::get_nr_variable()
111 {
112  return m_prob->n;
113 }
114 
115 void l2r_lr_fun::Hv(double *s, double *Hs)
116 {
117  int i;
118  int l=m_prob->l;
119  int w_size=get_nr_variable();
120  double *wa = SG_MALLOC(double, l);
121 
122  Xv(s, wa);
123  for(i=0;i<l;i++)
124  wa[i] = C[i]*D[i]*wa[i];
125 
126  XTv(wa, Hs);
127  for(i=0;i<w_size;i++)
128  Hs[i] = s[i] + Hs[i];
129  SG_FREE(wa);
130 }
131 
132 void l2r_lr_fun::Xv(double *v, double *res_Xv)
133 {
134  int32_t l=m_prob->l;
135  int32_t n=m_prob->n;
136  float64_t bias=0;
137 
138  if (m_prob->use_bias)
139  {
140  n--;
141  bias=v[n];
142  }
143 
144  m_prob->x->dense_dot_range(res_Xv, 0, l, NULL, v, n, bias);
145 }
146 
147 void l2r_lr_fun::XTv(double *v, double *res_XTv)
148 {
149  int l=m_prob->l;
150  int32_t n=m_prob->n;
151 
152  memset(res_XTv, 0, sizeof(double)*m_prob->n);
153 
154  if (m_prob->use_bias)
155  n--;
156 
157  for (int32_t i=0;i<l;i++)
158  {
159  m_prob->x->add_to_dense_vec(v[i], i, res_XTv, n);
160 
161  if (m_prob->use_bias)
162  res_XTv[n]+=v[i];
163  }
164 }
165 
166 l2r_l2_svc_fun::l2r_l2_svc_fun(const liblinear_problem *p, double* Cs)
167 {
168  int l=p->l;
169 
170  this->m_prob = p;
171 
172  z = SG_MALLOC(double, l);
173  D = SG_MALLOC(double, l);
174  I = SG_MALLOC(int, l);
175  C=Cs;
176 
177 }
178 
179 l2r_l2_svc_fun::~l2r_l2_svc_fun()
180 {
181  SG_FREE(z);
182  SG_FREE(D);
183  SG_FREE(I);
184 }
185 
186 double l2r_l2_svc_fun::fun(double *w)
187 {
188  int i;
189  double f=0;
190  float64_t *y=m_prob->y;
191  int l=m_prob->l;
192  int w_size=get_nr_variable();
193 
194  Xv(w, z);
195  for(i=0;i<l;i++)
196  {
197  z[i] = y[i]*z[i];
198  double d = 1-z[i];
199  if (d > 0)
200  f += C[i]*d*d;
201  }
202  SGVector<float64_t> w_wrap(w, w_size, false);
203  f += 0.5*linalg::dot(w_wrap, w_wrap);
204 
205  return(f);
206 }
207 
208 void l2r_l2_svc_fun::grad(double *w, double *g)
209 {
210  int i;
211  float64_t *y=m_prob->y;
212  int l=m_prob->l;
213  int w_size=get_nr_variable();
214 
215  sizeI = 0;
216  for (i=0;i<l;i++)
217  if (z[i] < 1)
218  {
219  z[sizeI] = C[i]*y[i]*(z[i]-1);
220  I[sizeI] = i;
221  sizeI++;
222  }
223  subXTv(z, g);
224 
225  for(i=0;i<w_size;i++)
226  g[i] = w[i] + 2*g[i];
227 }
228 
229 int l2r_l2_svc_fun::get_nr_variable()
230 {
231  return m_prob->n;
232 }
233 
234 void l2r_l2_svc_fun::Hv(double *s, double *Hs)
235 {
236  int i;
237  int l=m_prob->l;
238  int w_size=get_nr_variable();
239  double *wa = SG_MALLOC(double, l);
240 
241  subXv(s, wa);
242  for(i=0;i<sizeI;i++)
243  wa[i] = C[I[i]]*wa[i];
244 
245  subXTv(wa, Hs);
246  for(i=0;i<w_size;i++)
247  Hs[i] = s[i] + 2*Hs[i];
248  SG_FREE(wa);
249 }
250 
251 void l2r_l2_svc_fun::Xv(double *v, double *res_Xv)
252 {
253  int32_t l=m_prob->l;
254  int32_t n=m_prob->n;
255  float64_t bias=0;
256 
257  if (m_prob->use_bias)
258  {
259  n--;
260  bias=v[n];
261  }
262 
263  m_prob->x->dense_dot_range(res_Xv, 0, l, NULL, v, n, bias);
264 }
265 
266 void l2r_l2_svc_fun::subXv(double *v, double *res_Xv)
267 {
268  int32_t n=m_prob->n;
269  float64_t bias=0;
270 
271  if (m_prob->use_bias)
272  {
273  n--;
274  bias=v[n];
275  }
276 
277  m_prob->x->dense_dot_range_subset(I, sizeI, res_Xv, NULL, v, n, bias);
278 
279  /*for (int32_t i=0;i<sizeI;i++)
280  {
281  res_Xv[i]=m_prob->x->dense_dot(I[i], v, n);
282 
283  if (m_prob->use_bias)
284  res_Xv[i]+=bias;
285  }*/
286 }
287 
288 void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
289 {
290  int32_t n=m_prob->n;
291 
292  if (m_prob->use_bias)
293  n--;
294 
295  memset(XTv, 0, sizeof(float64_t)*m_prob->n);
296  for (int32_t i=0;i<sizeI;i++)
297  {
298  m_prob->x->add_to_dense_vec(v[i], I[i], XTv, n);
299 
300  if (m_prob->use_bias)
301  XTv[n]+=v[i];
302  }
303 }
304 
305 l2r_l2_svr_fun::l2r_l2_svr_fun(const liblinear_problem *prob, double *Cs, double p):
306  l2r_l2_svc_fun(prob, Cs)
307 {
308  m_p = p;
309 }
310 
311 double l2r_l2_svr_fun::fun(double *w)
312 {
313  int i;
314  double f=0;
315  double *y=m_prob->y;
316  int l=m_prob->l;
317  int w_size=get_nr_variable();
318  double d;
319 
320  Xv(w, z);
321 
322  for(i=0;i<w_size;i++)
323  f += w[i]*w[i];
324  f /= 2;
325  for(i=0;i<l;i++)
326  {
327  d = z[i] - y[i];
328  if(d < -m_p)
329  f += C[i]*(d+m_p)*(d+m_p);
330  else if(d > m_p)
331  f += C[i]*(d-m_p)*(d-m_p);
332  }
333 
334  return(f);
335 }
336 
337 void l2r_l2_svr_fun::grad(double *w, double *g)
338 {
339  int i;
340  double *y=m_prob->y;
341  int l=m_prob->l;
342  int w_size=get_nr_variable();
343  double d;
344 
345  sizeI = 0;
346  for(i=0;i<l;i++)
347  {
348  d = z[i] - y[i];
349 
350  // generate index set I
351  if(d < -m_p)
352  {
353  z[sizeI] = C[i]*(d+m_p);
354  I[sizeI] = i;
355  sizeI++;
356  }
357  else if(d > m_p)
358  {
359  z[sizeI] = C[i]*(d-m_p);
360  I[sizeI] = i;
361  sizeI++;
362  }
363 
364  }
365  subXTv(z, g);
366 
367  for(i=0;i<w_size;i++)
368  g[i] = w[i] + 2*g[i];
369 }
370 
371 
372 // A coordinate descent algorithm for
373 // multi-class support vector machines by Crammer and Singer
374 //
375 // min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
376 // s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
377 //
378 // where e^m_i = 0 if y_i = m,
379 // e^m_i = 1 if y_i != m,
380 // C^m_i = C if m = y_i,
381 // C^m_i = 0 if m != y_i,
382 // and w_m(\alpha) = \sum_i \alpha^m_i x_i
383 //
384 // Given:
385 // x, y, C
386 // eps is the stopping tolerance
387 //
388 // solution will be put in w
389 
390 #define GETI(i) (prob->y[i])
391 // To support weights for instances, use GETI(i) (i)
392 
393 Solver_MCSVM_CS::Solver_MCSVM_CS(const liblinear_problem *p, int n_class,
394  double *weighted_C, double *w0_reg,
395  double epsilon, int max_it, double max_time,
396  mcsvm_state* given_state)
397 {
398  this->w_size = p->n;
399  this->l = p->l;
400  this->nr_class = n_class;
401  this->eps = epsilon;
402  this->max_iter = max_it;
403  this->prob = p;
404  this->C = weighted_C;
405  this->w0 = w0_reg;
406  this->max_train_time = max_time;
407  this->state = given_state;
408 }
409 
410 Solver_MCSVM_CS::~Solver_MCSVM_CS()
411 {
412 }
413 
414 int compare_double(const void *a, const void *b)
415 {
416  if(*(double *)a > *(double *)b)
417  return -1;
418  if(*(double *)a < *(double *)b)
419  return 1;
420  return 0;
421 }
422 
423 void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
424 {
425  int r;
426  double *D=SGVector<float64_t>::clone_vector(state->B, active_i);
427 
428  if(yi < active_i)
429  D[yi] += A_i*C_yi;
430  qsort(D, active_i, sizeof(double), compare_double);
431 
432  double beta = D[0] - A_i*C_yi;
433  for(r=1;r<active_i && beta<r*D[r];r++)
434  beta += D[r];
435 
436  beta /= r;
437  for(r=0;r<active_i;r++)
438  {
439  if(r == yi)
440  alpha_new[r] = CMath::min(C_yi, (beta-state->B[r])/A_i);
441  else
442  alpha_new[r] = CMath::min((double)0, (beta - state->B[r])/A_i);
443  }
444  SG_FREE(D);
445 }
446 
447 bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG)
448 {
449  double bound = 0;
450  if(m == yi)
451  bound = C[int32_t(GETI(i))];
452  if(alpha_i == bound && state->G[m] < minG)
453  return true;
454  return false;
455 }
456 
457 void Solver_MCSVM_CS::solve()
458 {
459  int i, m, s, k;
460  int iter = 0;
461  double *w,*B,*G,*alpha,*alpha_new,*QD,*d_val;
462  int *index,*d_ind,*alpha_index,*y_index,*active_size_i;
463 
464  if (!state->allocated)
465  {
466  state->w = SG_CALLOC(double, nr_class*w_size);
467  state->B = SG_CALLOC(double, nr_class);
468  state->G = SG_CALLOC(double, nr_class);
469  state->alpha = SG_CALLOC(double, l*nr_class);
470  state->alpha_new = SG_CALLOC(double, nr_class);
471  state->index = SG_CALLOC(int, l);
472  state->QD = SG_CALLOC(double, l);
473  state->d_ind = SG_CALLOC(int, nr_class);
474  state->d_val = SG_CALLOC(double, nr_class);
475  state->alpha_index = SG_CALLOC(int, nr_class*l);
476  state->y_index = SG_CALLOC(int, l);
477  state->active_size_i = SG_CALLOC(int, l);
478  state->allocated = true;
479  }
480  w = state->w;
481  B = state->B;
482  G = state->G;
483  alpha = state->alpha;
484  alpha_new = state->alpha_new;
485  index = state->index;
486  QD = state->QD;
487  d_ind = state->d_ind;
488  d_val = state->d_val;
489  alpha_index = state->alpha_index;
490  y_index = state->y_index;
491  active_size_i = state->active_size_i;
492 
493  double* tx = SG_MALLOC(double, w_size);
494  int dim = prob->x->get_dim_feature_space();
495 
496  int active_size = l;
497  double eps_shrink = CMath::max(10.0*eps, 1.0); // stopping tolerance for shrinking
498  bool start_from_all = true;
499  CTime start_time;
500  // initial
501  if (!state->inited)
502  {
503  for(i=0;i<l;i++)
504  {
505  for(m=0;m<nr_class;m++)
506  alpha_index[i*nr_class+m] = m;
507 
508  QD[i] = prob->x->dot(i, prob->x,i);
509  if (prob->use_bias)
510  QD[i] += 1.0;
511 
512  active_size_i[i] = nr_class;
513  y_index[i] = prob->y[i];
514  index[i] = i;
515  }
516  state->inited = true;
517  }
518 
519  // TODO: replace with the new signal
520  // while(iter < max_iter && !CSignal::cancel_computations())
521  while (iter < max_iter)
522  {
523  double stopping = -CMath::INFTY;
524  for(i=0;i<active_size;i++)
525  {
526  int j = CMath::random(i, active_size-1);
527  CMath::swap(index[i], index[j]);
528  }
529  for(s=0;s<active_size;s++)
530  {
531  i = index[s];
532  double Ai = QD[i];
533  double *alpha_i = &alpha[i*nr_class];
534  int *alpha_index_i = &alpha_index[i*nr_class];
535 
536  if(Ai > 0)
537  {
538  for(m=0;m<active_size_i[i];m++)
539  G[m] = 1;
540  if(y_index[i] < active_size_i[i])
541  G[y_index[i]] = 0;
542 
543  memset(tx,0,dim*sizeof(double));
544  prob->x->add_to_dense_vec(1.0,i,tx,dim);
545  for (k=0; k<dim; k++)
546  {
547  if (tx[k]==0.0)
548  continue;
549 
550  double* w_i = &w[k*nr_class];
551  for (m=0; m<active_size_i[i]; m++)
552  G[m] += w_i[alpha_index_i[m]]*tx[k];
553  }
554 
555  // experimental
556  // ***
557  if (prob->use_bias)
558  {
559  double *w_i = &w[(w_size-1)*nr_class];
560  for(m=0; m<active_size_i[i]; m++)
561  G[m] += w_i[alpha_index_i[m]];
562  }
563  if (w0)
564  {
565  for (k=0; k<dim; k++)
566  {
567  double *w0_i = &w0[k*nr_class];
568  for(m=0; m<active_size_i[i]; m++)
569  G[m] += w0_i[alpha_index_i[m]];
570  }
571  }
572  // ***
573 
574  double minG = CMath::INFTY;
575  double maxG = -CMath::INFTY;
576  for(m=0;m<active_size_i[i];m++)
577  {
578  if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG)
579  minG = G[m];
580  if(G[m] > maxG)
581  maxG = G[m];
582  }
583  if(y_index[i] < active_size_i[i])
584  if(alpha_i[int32_t(prob->y[i])] < C[int32_t(GETI(i))] && G[y_index[i]] < minG)
585  minG = G[y_index[i]];
586 
587  for(m=0;m<active_size_i[i];m++)
588  {
589  if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG))
590  {
591  active_size_i[i]--;
592  while(active_size_i[i]>m)
593  {
594  if(!be_shrunk(i, active_size_i[i], y_index[i],
595  alpha_i[alpha_index_i[active_size_i[i]]], minG))
596  {
597  CMath::swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
598  CMath::swap(G[m], G[active_size_i[i]]);
599  if(y_index[i] == active_size_i[i])
600  y_index[i] = m;
601  else if(y_index[i] == m)
602  y_index[i] = active_size_i[i];
603  break;
604  }
605  active_size_i[i]--;
606  }
607  }
608  }
609 
610  if(active_size_i[i] <= 1)
611  {
612  active_size--;
613  CMath::swap(index[s], index[active_size]);
614  s--;
615  continue;
616  }
617 
618  if(maxG-minG <= 1e-12)
619  continue;
620  else
621  stopping = CMath::CMath::max(maxG - minG, stopping);
622 
623  for(m=0;m<active_size_i[i];m++)
624  B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;
625 
626  solve_sub_problem(Ai, y_index[i], C[int32_t(GETI(i))], active_size_i[i], alpha_new);
627  int nz_d = 0;
628  for(m=0;m<active_size_i[i];m++)
629  {
630  double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
631  alpha_i[alpha_index_i[m]] = alpha_new[m];
632  if(fabs(d) >= 1e-12)
633  {
634  d_ind[nz_d] = alpha_index_i[m];
635  d_val[nz_d] = d;
636  nz_d++;
637  }
638  }
639 
640  memset(tx,0,dim*sizeof(double));
641  prob->x->add_to_dense_vec(1.0,i,tx,dim);
642  for (k=0; k<dim; k++)
643  {
644  if (tx[k]==0.0)
645  continue;
646 
647  double* w_i = &w[k*nr_class];
648  for (m=0; m<nz_d; m++)
649  w_i[d_ind[m]] += d_val[m]*tx[k];
650  }
651  // experimental
652  // ***
653  if (prob->use_bias)
654  {
655  double *w_i = &w[(w_size-1)*nr_class];
656  for(m=0;m<nz_d;m++)
657  w_i[d_ind[m]] += d_val[m];
658  }
659  // ***
660  }
661  }
662 
663  iter++;
664  /*
665  if(iter % 10 == 0)
666  {
667  SG_SINFO(".")
668  }
669  */
670 
671  if(stopping < eps_shrink)
672  {
673  if(stopping < eps && start_from_all == true)
674  break;
675  else
676  {
677  active_size = l;
678  for(i=0;i<l;i++)
679  active_size_i[i] = nr_class;
680  //SG_SINFO("*")
681  eps_shrink = CMath::max(eps_shrink/2, eps);
682  start_from_all = true;
683  }
684  }
685  else
686  start_from_all = false;
687 
688  if (max_train_time!=0.0 && max_train_time < start_time.cur_time_diff())
689  break;
690  }
691 
692  SG_SINFO("\noptimization finished, #iter = %d\n",iter)
693  if (iter >= max_iter)
694  SG_SINFO("Warning: reaching max number of iterations\n")
695 
696  SG_FREE(tx);
697 }
698 
699 //
700 // Interface functions
701 //
702 
703 void destroy_model(struct liblinear_model *model_)
704 {
705  SG_FREE(model_->w);
706  SG_FREE(model_->label);
707  SG_FREE(model_);
708 }
709 
710 void destroy_param(liblinear_parameter* param)
711 {
712  SG_FREE(param->weight_label);
713  SG_FREE(param->weight);
714 }
715 #endif // DOXYGEN_SHOULD_SKIP_THIS
Class Time that implements a stopwatch based on either cpu time or wall clock time.
Definition: Time.h:42
static T * clone_vector(const T *vec, int32_t len)
Definition: SGVector.cpp:271
static T max(T a, T b)
Definition: Math.h:149
static const float64_t INFTY
infinity
Definition: Math.h:1868
T dot(const SGVector< T > &a, const SGVector< T > &b)
static uint64_t random()
Definition: Math.h:811
float64_t cur_time_diff(bool verbose=false)
Definition: Time.cpp:68
#define GETI(i)
Definition: LibLinear.cpp:1184
double float64_t
Definition: common.h:60
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
#define SG_SINFO(...)
Definition: SGIO.h:158
static void swap(T &a, T &b)
Definition: Math.h:406
T max(const Container< T > &a)
static T min(T a, T b)
Definition: Math.h:138

SHOGUN Machine Learning Toolbox - Documentation