SHOGUN  6.1.3
LibLinearRegression.cpp
Go to the documentation of this file.
1 
2 /*
3  * This program is free software; you can redistribute it and/or modify
4  * it under the terms of the GNU General Public License as published by
5  * the Free Software Foundation; either version 3 of the License, or
6  * (at your option) any later version.
7  *
8  * Copyright (C) 2012 Soeren Sonnenburg
9  */
10 
11 #include <shogun/lib/config.h>
12 #ifdef HAVE_LAPACK
13 #include <shogun/base/progress.h>
15 #include <shogun/lib/Signal.h>
19 
20 using namespace shogun;
21 
24 {
25  init_defaults();
26 }
27 
30 {
31  init_defaults();
32  set_C(C);
33  set_features(feats);
34  set_labels(labs);
35 }
36 
37 void CLibLinearRegression::init_defaults()
38 {
39  set_C(1.0);
40  set_epsilon(1e-2);
41  set_max_iter(10000);
42  set_use_bias(false);
44 }
45 
46 void CLibLinearRegression::register_parameters()
47 {
48  SG_ADD(&m_C, "m_C", "regularization constant",MS_AVAILABLE);
49  SG_ADD(&m_epsilon, "m_epsilon", "tolerance epsilon",MS_NOT_AVAILABLE);
50  SG_ADD(&m_epsilon, "m_tube_epsilon", "svr tube epsilon",MS_AVAILABLE);
51  SG_ADD(&m_max_iter, "m_max_iter", "max number of iterations",MS_NOT_AVAILABLE);
52  SG_ADD(&m_use_bias, "m_use_bias", "indicates whether bias should be used",MS_NOT_AVAILABLE);
53 }
54 
56 {
57 }
58 
60 {
61 
62  if (data)
63  set_features((CDotFeatures*)data);
64 
67 
68  int32_t num_train_labels=m_labels->get_num_labels();
69  int32_t num_feat=features->get_dim_feature_space();
70  int32_t num_vec=features->get_num_vectors();
71 
72  if (num_vec!=num_train_labels)
73  {
74  SG_ERROR("number of vectors %d does not match "
75  "number of training labels %d\n",
76  num_vec, num_train_labels);
77  }
78 
80  if (get_use_bias())
81  w=SGVector<float64_t>(SG_MALLOC(float64_t, num_feat+1), num_feat);
82  else
83  w=SGVector<float64_t>(num_feat);
84 
85  liblinear_problem prob;
86  if (get_use_bias())
87  {
88  prob.n=w.vlen+1;
89  memset(w.vector, 0, sizeof(float64_t)*(w.vlen+1));
90  }
91  else
92  {
93  prob.n=w.vlen;
94  memset(w.vector, 0, sizeof(float64_t)*(w.vlen+0));
95  }
96  prob.l=num_vec;
97  prob.x=features;
98  prob.y=SG_MALLOC(float64_t, prob.l);
99 
101  {
102  case L2R_L2LOSS_SVR:
103  {
104  double* Cs = SG_MALLOC(double, prob.l);
105  for(int i = 0; i < prob.l; i++)
106  Cs[i] = get_C();
107 
108  function *fun_obj=new l2r_l2_svr_fun(&prob, Cs, get_tube_epsilon());
109  CTron tron_obj(fun_obj, get_epsilon());
110  tron_obj.tron(w.vector, m_max_train_time);
111  delete fun_obj;
112  SG_FREE(Cs);
113  break;
114 
115  }
116  case L2R_L1LOSS_SVR_DUAL:
117  solve_l2r_l1l2_svr(w, &prob);
118  break;
119  case L2R_L2LOSS_SVR_DUAL:
120  solve_l2r_l1l2_svr(w, &prob);
121  break;
122  default:
123  SG_ERROR("Error: unknown regression type\n")
124  break;
125  }
126 
127  set_w(w);
128 
129  return true;
130 }
131 
132 // A coordinate descent algorithm for
133 // L1-loss and L2-loss epsilon-SVR dual problem
134 //
135 // min_\beta 0.5\beta^T (Q + diag(lambda)) \beta - p \sum_{i=1}^l|\beta_i| + \sum_{i=1}^l yi\beta_i,
136 // s.t. -upper_bound_i <= \beta_i <= upper_bound_i,
137 //
138 // where Qij = xi^T xj and
139 // D is a diagonal matrix
140 //
141 // In L1-SVM case:
142 // upper_bound_i = C
143 // lambda_i = 0
144 // In L2-SVM case:
145 // upper_bound_i = INF
146 // lambda_i = 1/(2*C)
147 //
148 // Given:
149 // x, y, p, C
150 // eps is the stopping tolerance
151 //
152 // solution will be put in w
153 //
154 // See Algorithm 4 of Ho and Lin, 2012
155 
156 #undef GETI
157 #define GETI(i) (0)
158 // To support weights for instances, use GETI(i) (i)
159 
160 void CLibLinearRegression::solve_l2r_l1l2_svr(SGVector<float64_t>& w, const liblinear_problem *prob)
161 {
162  int l = prob->l;
163  double C = get_C();
164  double p = get_tube_epsilon();
165  int w_size = prob->n;
166  double eps = get_epsilon();
167  int i, s, iter = 0;
168  int max_iter = 1000;
169  int active_size = l;
170  int *index = new int[l];
171 
172  double d, G, H;
173  double Gmax_old = CMath::INFTY;
174  double Gmax_new, Gnorm1_new;
175  double Gnorm1_init = 0.0;
176  double *beta = new double[l];
177  double *QD = new double[l];
178  double *y = prob->y;
179 
180  // L2R_L2LOSS_SVR_DUAL
181  double lambda[1], upper_bound[1];
182  lambda[0] = 0.5/C;
183  upper_bound[0] = CMath::INFTY;
184 
186  {
187  lambda[0] = 0;
188  upper_bound[0] = C;
189  }
190 
191  // Initial beta can be set here. Note that
192  // -upper_bound <= beta[i] <= upper_bound
193  for(i=0; i<l; i++)
194  beta[i] = 0;
195 
196  for(i=0; i<w_size; i++)
197  w[i] = 0;
198  for(i=0; i<l; i++)
199  {
200  QD[i] = prob->x->dot(i, prob->x,i);
201  prob->x->add_to_dense_vec(beta[i], i, w.vector, w_size);
202 
203  if (prob->use_bias)
204  w.vector[w_size]+=beta[i];
205 
206  index[i] = i;
207  }
208 
209  auto pb = progress(range(10));
210  while(iter < max_iter)
211  {
212  Gmax_new = 0;
213  Gnorm1_new = 0;
214 
215  for(i=0; i<active_size; i++)
216  {
217  int j = CMath::random(i, active_size-1);
218  CMath::swap(index[i], index[j]);
219  }
220 
221  for(s=0; s<active_size; s++)
222  {
223  i = index[s];
224  G = -y[i] + lambda[GETI(i)]*beta[i];
225  H = QD[i] + lambda[GETI(i)];
226 
227  G += prob->x->dense_dot(i, w.vector, w_size);
228  if (prob->use_bias)
229  G+=w.vector[w_size];
230 
231  double Gp = G+p;
232  double Gn = G-p;
233  double violation = 0;
234  if(beta[i] == 0)
235  {
236  if(Gp < 0)
237  violation = -Gp;
238  else if(Gn > 0)
239  violation = Gn;
240  else if(Gp>Gmax_old && Gn<-Gmax_old)
241  {
242  active_size--;
243  CMath::swap(index[s], index[active_size]);
244  s--;
245  continue;
246  }
247  }
248  else if(beta[i] >= upper_bound[GETI(i)])
249  {
250  if(Gp > 0)
251  violation = Gp;
252  else if(Gp < -Gmax_old)
253  {
254  active_size--;
255  CMath::swap(index[s], index[active_size]);
256  s--;
257  continue;
258  }
259  }
260  else if(beta[i] <= -upper_bound[GETI(i)])
261  {
262  if(Gn < 0)
263  violation = -Gn;
264  else if(Gn > Gmax_old)
265  {
266  active_size--;
267  CMath::swap(index[s], index[active_size]);
268  s--;
269  continue;
270  }
271  }
272  else if(beta[i] > 0)
273  violation = fabs(Gp);
274  else
275  violation = fabs(Gn);
276 
277  Gmax_new = CMath::max(Gmax_new, violation);
278  Gnorm1_new += violation;
279 
280  // obtain Newton direction d
281  if(Gp < H*beta[i])
282  d = -Gp/H;
283  else if(Gn > H*beta[i])
284  d = -Gn/H;
285  else
286  d = -beta[i];
287 
288  if(fabs(d) < 1.0e-12)
289  continue;
290 
291  double beta_old = beta[i];
292  beta[i] = CMath::min(CMath::max(beta[i]+d, -upper_bound[GETI(i)]), upper_bound[GETI(i)]);
293  d = beta[i]-beta_old;
294 
295  if(d != 0)
296  {
297  prob->x->add_to_dense_vec(d, i, w.vector, w_size);
298 
299  if (prob->use_bias)
300  w.vector[w_size]+=d;
301  }
302  }
303 
304  if(iter == 0)
305  Gnorm1_init = Gnorm1_new;
306  iter++;
307 
308  pb.print_absolute(
309  Gnorm1_new, -CMath::log10(Gnorm1_new),
310  -CMath::log10(eps * Gnorm1_init), -CMath::log10(Gnorm1_init));
311 
312  if(Gnorm1_new <= eps*Gnorm1_init)
313  {
314  if(active_size == l)
315  break;
316  else
317  {
318  active_size = l;
319  Gmax_old = CMath::INFTY;
320  continue;
321  }
322  }
323 
324  Gmax_old = Gmax_new;
325  }
326 
327  pb.complete_absolute();
328  SG_INFO("\noptimization finished, #iter = %d\n", iter)
329  if(iter >= max_iter)
330  SG_INFO("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster\n\n")
331 
332  // calculate objective value
333  double v = 0;
334  int nSV = 0;
335  for(i=0; i<w_size; i++)
336  v += w[i]*w[i];
337  v = 0.5*v;
338  for(i=0; i<l; i++)
339  {
340  v += p*fabs(beta[i]) - y[i]*beta[i] + 0.5*lambda[GETI(i)]*beta[i]*beta[i];
341  if(beta[i] != 0)
342  nSV++;
343  }
344 
345  SG_INFO("Objective value = %lf\n", v)
346  SG_INFO("nSV = %d\n",nSV)
347 
348  delete [] beta;
349  delete [] QD;
350  delete [] index;
351 }
352 
353 #endif /* HAVE_LAPACK */
#define SG_INFO(...)
Definition: SGIO.h:117
void set_max_iter(int32_t max_iter)
virtual bool train_machine(CFeatures *data=NULL)
virtual ELabelType get_label_type() const =0
static T max(T a, T b)
Definition: Math.h:149
virtual void set_w(const SGVector< float64_t > src_w)
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
static const float64_t INFTY
infinity
Definition: Math.h:1868
virtual int32_t get_num_labels() const =0
PRange< T > progress(Range< T > range, const SGIO &io, std::string prefix="PROGRESS: ", SG_PRG_MODE mode=UTF8, std::function< bool()> condition=[](){return true;})
Definition: progress.h:712
real valued labels (e.g. for regression, classifier outputs)
Definition: LabelTypes.h:22
static float64_t log10(float64_t v)
Definition: Math.h:693
L2 regularized support vector regression with L1 epsilon tube loss.
virtual int32_t get_num_vectors() const =0
void tron(float64_t *w, float64_t max_train_time)
Definition: tron.cpp:73
float64_t m_max_train_time
Definition: Machine.h:433
CLabels * m_labels
Definition: Machine.h:436
#define SG_ERROR(...)
Definition: SGIO.h:128
void set_liblinear_regression_type(LIBLINEAR_REGRESSION_TYPE st)
Range< T > range(T rend)
Definition: range.h:136
Features that support dot products among other operations.
Definition: DotFeatures.h:44
class Tron
Definition: tron.h:55
static uint64_t random()
Definition: Math.h:811
virtual int32_t get_dim_feature_space() const =0
L2 regularized support vector regression with L2 epsilon tube loss.
L2 regularized support vector regression with L2 epsilon tube loss (dual)
#define ASSERT(x)
Definition: SGIO.h:176
double float64_t
Definition: common.h:60
virtual void set_features(CDotFeatures *feat)
Class LinearMachine is a generic interface for all kinds of linear machines like classifiers.
Definition: LinearMachine.h:63
LIBLINEAR_REGRESSION_TYPE m_liblinear_regression_type
CDotFeatures * features
#define GETI(i)
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The class Features is the base class of all feature objects.
Definition: Features.h:69
static void swap(T &a, T &b)
Definition: Math.h:406
#define SG_ADD(...)
Definition: SGObject.h:93
virtual void set_labels(CLabels *lab)
Definition: Machine.cpp:72
static T min(T a, T b)
Definition: Math.h:138
void set_epsilon(float64_t epsilon)
index_t vlen
Definition: SGVector.h:571

SHOGUN Machine Learning Toolbox - Documentation