SHOGUN  6.1.3
DotFeatures.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2009 Soeren Sonnenburg
8  * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society
9  */
10 
11 #include <shogun/base/Parallel.h>
12 #include <shogun/base/Parameter.h>
13 #include <shogun/base/progress.h>
15 #include <shogun/io/SGIO.h>
16 #include <shogun/lib/Signal.h>
17 #include <shogun/lib/Time.h>
20 
21 #ifdef HAVE_OPENMP
22 #include <omp.h>
23 #endif
24 
25 using namespace shogun;
26 
27 
29  :CFeatures(size), combined_weight(1.0)
30 {
31  init();
32 }
33 
34 
37 {
38  init();
39 }
40 
41 
43  :CFeatures(loader)
44 {
45  init();
46 }
47 
49 {
50  return dense_dot(vec_idx1, vec2.vector, vec2.vlen);
51 }
52 
53 void CDotFeatures::dense_dot_range(float64_t* output, int32_t start, int32_t stop, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b)
54 {
55  ASSERT(output)
56  // write access is internally between output[start..stop] so the following
57  // line is necessary to write to output[0...(stop-start-1)]
58  output-=start;
59  ASSERT(start>=0)
60  ASSERT(start<stop)
61  ASSERT(stop<=get_num_vectors())
62 
63  int32_t num_vectors=stop-start;
64  ASSERT(num_vectors>0)
65 
66  int32_t num_threads;
67  int32_t step;
68  auto pb = progress(range(num_vectors), *this->io);
69 #pragma omp parallel shared(num_threads, step)
70  {
71 #ifdef HAVE_OPENMP
72  #pragma omp single
73  {
74  num_threads=omp_get_num_threads();
75  step=num_vectors/num_threads;
76  num_threads--;
77  }
78  int32_t thread_num=omp_get_thread_num();
79 #else
80  num_threads=0;
81  step=num_vectors;
82  int32_t thread_num=0;
83 #endif
84 
85  int32_t t_start=thread_num*step;
86  int32_t t_stop=(thread_num==num_threads) ? stop : (thread_num+1)*step;
87 
88 #ifdef WIN32
89  for (int32_t i=t_start; i<t_stop; i++)
90 #else
91  // TODO: replace with the new signal
92  // for (int32_t i=t_start; i<t_stop &&
93  // !CSignal::cancel_computations(); i++)
94  for (int32_t i = t_start; i < t_stop; i++)
95 #endif
96  {
97  if (alphas)
98  output[i]=alphas[i]*this->dense_dot(i, vec, dim)+b;
99  else
100  output[i]=this->dense_dot(i, vec, dim)+b;
101  pb.print_progress();
102  }
103  }
104  pb.complete();
105 }
106 
107 void CDotFeatures::dense_dot_range_subset(int32_t* sub_index, int32_t num, float64_t* output, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b)
108 {
109  ASSERT(sub_index)
110  ASSERT(output)
111 
112  auto pb = progress(range(num), *this->io);
113  int32_t num_threads;
114  int32_t step;
115  #pragma omp parallel shared(num_threads, step)
116  {
117 #ifdef HAVE_OPENMP
118  #pragma omp single
119  {
120  num_threads=omp_get_num_threads();
121  step=num/num_threads;
122  num_threads--;
123  }
124  int32_t thread_num=omp_get_thread_num();
125 #else
126  num_threads=0;
127  step = num;
128  int32_t thread_num=0;
129 #endif
130 
131  int32_t t_start=thread_num*step;
132  int32_t t_stop=(thread_num==num_threads) ? num : (thread_num+1)*step;
133 
134 #ifdef WIN32
135  for (int32_t i=t_start; i<t_stop; i++)
136 #else
137  // TODO: replace with the new signal
138  // for (int32_t i=t_start; i<t_stop &&
139  // !CSignal::cancel_computations(); i++)
140  for (int32_t i = t_start; i < t_stop; i++)
141 #endif
142  {
143  if (alphas)
144  output[i]=alphas[sub_index[i]]*this->dense_dot(sub_index[i], vec, dim)+b;
145  else
146  output[i]=this->dense_dot(sub_index[i], vec, dim)+b;
147  pb.print_progress();
148  }
149  }
150  pb.complete();
151 }
152 
154 {
155 
156  int64_t offs=0;
157  int32_t num=get_num_vectors();
158  int32_t dim=get_dim_feature_space();
159  ASSERT(num>0)
160  ASSERT(dim>0)
161 
162  SGMatrix<float64_t> m(dim, num);
163  m.zero();
164 
165  for (int32_t i=0; i<num; i++)
166  {
167  add_to_dense_vec(1.0, i, &(m.matrix[offs]), dim);
168  offs+=dim;
169  }
170 
171  return m;
172 }
173 
175 {
176 
177  int32_t dim=get_dim_feature_space();
178  ASSERT(num>=0 && num<=get_num_vectors())
179  ASSERT(dim>0)
180 
181  SGVector<float64_t> v(dim);
182  v.zero();
183  add_to_dense_vec(1.0, num, v.vector, dim);
184  return v;
185 }
186 
188 {
189  int32_t num=get_num_vectors();
190  int32_t d=get_dim_feature_space();
191  float64_t* w= SG_MALLOC(float64_t, d);
193 
194  CTime t;
195  float64_t start_cpu=t.get_runtime();
196  float64_t start_wall=t.get_curtime();
197  for (int32_t r=0; r<repeats; r++)
198  {
199  for (int32_t i=0; i<num; i++)
200  add_to_dense_vec(1.172343*(r+1), i, w, d);
201  }
202 
203  SG_PRINT("Time to process %d x num=%d add_to_dense_vector ops: cputime %fs walltime %fs\n",
204  repeats, num, (t.get_runtime()-start_cpu)/repeats,
205  (t.get_curtime()-start_wall)/repeats);
206 
207  SG_FREE(w);
208 }
209 
211 {
212  int32_t num=get_num_vectors();
213  int32_t d=get_dim_feature_space();
214  float64_t* w= SG_MALLOC(float64_t, d);
215  float64_t* out= SG_MALLOC(float64_t, num);
216  float64_t* alphas= SG_MALLOC(float64_t, num);
218  SGVector<float64_t>::range_fill_vector(alphas, num, 1.2345);
219  //SGVector<float64_t>::fill_vector(w, d, 17.0);
220  //SGVector<float64_t>::fill_vector(alphas, num, 1.2345);
221 
222  CTime t;
223  float64_t start_cpu=t.get_runtime();
224  float64_t start_wall=t.get_curtime();
225 
226  for (int32_t r=0; r<repeats; r++)
227  dense_dot_range(out, 0, num, alphas, w, d, 23);
228 
229 #ifdef DEBUG_DOTFEATURES
230  CMath::display_vector(out, 40, "dense_dot_range");
231  float64_t* out2= SG_MALLOC(float64_t, num);
232 
233  for (int32_t r=0; r<repeats; r++)
234  {
235  CMath::fill_vector(out2, num, 0.0);
236  for (int32_t i=0; i<num; i++)
237  out2[i]+=dense_dot(i, w, d)*alphas[i]+23;
238  }
239  CMath::display_vector(out2, 40, "dense_dot");
240  for (int32_t i=0; i<num; i++)
241  out2[i]-=out[i];
242  CMath::display_vector(out2, 40, "diff");
243 #endif
244  SG_PRINT("Time to process %d x num=%d dense_dot_range ops: cputime %fs walltime %fs\n",
245  repeats, num, (t.get_runtime()-start_cpu)/repeats,
246  (t.get_curtime()-start_wall)/repeats);
247 
248  SG_FREE(alphas);
249  SG_FREE(out);
250  SG_FREE(w);
251 }
252 
254 {
255  int32_t num=get_num_vectors();
256  int32_t dim=get_dim_feature_space();
257  ASSERT(num>0)
258  ASSERT(dim>0)
259 
261  linalg::zero(mean);
262 
263  for (int32_t i = 0; i < num; ++i)
264  add_to_dense_vec(1, i, mean.vector, dim);
265 
266  linalg::scale(mean, mean, 1.0 / num);
267 
268  return mean;
269 }
270 
273 {
274  ASSERT(lhs && rhs)
276 
277  int32_t num_lhs=lhs->get_num_vectors();
278  int32_t num_rhs=rhs->get_num_vectors();
279  int32_t dim=lhs->get_dim_feature_space();
280  ASSERT(num_lhs>0)
281  ASSERT(num_rhs>0)
282  ASSERT(dim>0)
283 
285  linalg::zero(mean);
286 
287  for (int i = 0; i < num_lhs; i++)
288  lhs->add_to_dense_vec(1, i, mean.vector, dim);
289 
290  for (int i = 0; i < num_rhs; i++)
291  rhs->add_to_dense_vec(1, i, mean.vector, dim);
292 
293  linalg::scale(mean, mean, 1.0 / (num_lhs + num_rhs));
294 
295  return mean;
296 }
297 
299 {
300  int32_t num=get_num_vectors();
301  int32_t dim=get_dim_feature_space();
302  ASSERT(num>0)
303  ASSERT(dim>0)
304 
305  SGMatrix<float64_t> cov(dim, dim);
307 
308  if (copy_data_for_speed)
309  {
310  SGMatrix<float64_t> centered_data(dim, num);
311  for (int i = 0; i < num; i++)
312  {
314  centered_data.set_column(i, linalg::add(v, mean, 1.0, -1.0));
315  }
316 
317  cov = linalg::matrix_prod(centered_data, centered_data, false, true);
318  }
319  else
320  {
321  linalg::zero(cov);
322  for (int i = 0; i < num; i++)
323  {
325  linalg::add(v, mean, v, 1.0, -1.0);
326  for (int m = 0; m < v.vlen; m++)
327  linalg::add_col_vec(cov, m, v, cov, 1.0, v.vector[m]);
328  }
329  for (int m = 0; m < dim - 1; m++)
330  {
331  for (int n = m + 1; n < dim; n++)
332  {
333  (cov.matrix)[m * dim + n] = (cov.matrix)[n * dim + m];
334  }
335  }
336  }
337  linalg::scale(cov, cov, 1.0 / num);
338 
339  return cov;
340 }
341 
343  CDotFeatures* lhs, CDotFeatures* rhs, bool copy_data_for_speed)
344 {
345  CDotFeatures* feats[2];
346  feats[0]=lhs;
347  feats[1]=rhs;
348 
349  int32_t nums[2], dims[2], num=0;
350 
351  for (int i = 0; i < 2; i++)
352  {
353  nums[i]=feats[i]->get_num_vectors();
354  dims[i]=feats[i]->get_dim_feature_space();
355  ASSERT(nums[i]>0)
356  ASSERT(dims[i]>0)
357  num += nums[i];
358  }
359 
360  ASSERT(dims[0]==dims[1])
361  int32_t dim = dims[0];
362 
363  SGMatrix<float64_t> cov(dim, dim);
365 
366  if (copy_data_for_speed)
367  {
368  SGMatrix<float64_t> centered_data(dim, num);
369  for (int i = 0; i < num; i++)
370  {
372  i < nums[0] ? lhs->get_computed_dot_feature_vector(i)
373  : rhs->get_computed_dot_feature_vector(i - nums[0]);
374 
375  centered_data.set_column(i, linalg::add(v, mean, 1.0, -1.0));
376  }
377 
378  cov = linalg::matrix_prod(centered_data, centered_data, false, true);
379  }
380  else
381  {
382  linalg::zero(cov);
383  for (int i = 0; i < 2; i++)
384  {
385  for (int j = 0; j < nums[i]; j++)
386  {
388  feats[i]->get_computed_dot_feature_vector(j);
389  linalg::add(v, mean, v, 1.0, -1.0);
390  for (int m = 0; m < v.vlen; m++)
391  linalg::add_col_vec(cov, m, v, cov, 1.0, v.vector[m]);
392  }
393  }
394 
395  for (int m = 0; m < dim - 1; m++)
396  {
397  for (int n = m + 1; n < dim; n++)
398  {
399  (cov.matrix[m * dim + n]) = (cov.matrix)[n * dim + m];
400  }
401  }
402  }
403  linalg::scale(cov, cov, 1.0 / num);
404 
405  return cov;
406 }
407 
408 void CDotFeatures::init()
409 {
411  m_parameters->add(&combined_weight, "combined_weight",
412  "Feature weighting in combined dot features.");
413 }
Class Time that implements a stopwatch based on either cpu time or wall clock time.
Definition: Time.h:42
static void range_fill_vector(T *vec, int32_t len, T start=0)
Definition: SGVector.cpp:286
virtual void dense_dot_range(float64_t *output, int32_t start, int32_t stop, float64_t *alphas, float64_t *vec, int32_t dim, float64_t b)
Definition: DotFeatures.cpp:53
virtual float64_t dense_dot(int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len)=0
PRange< T > progress(Range< T > range, const SGIO &io, std::string prefix="PROGRESS: ", SG_PRG_MODE mode=UTF8, std::function< bool()> condition=[](){return true;})
Definition: progress.h:712
virtual SGMatrix< float64_t > get_cov(bool copy_data_for_speed=true)
CDotFeatures(int32_t size=0)
Definition: DotFeatures.cpp:28
void scale(SGVector< T > &a, SGVector< T > &result, T alpha=1)
static float64_t get_runtime()
Definition: Time.h:106
static SGMatrix< float64_t > compute_cov(CDotFeatures *lhs, CDotFeatures *rhs, bool copy_data_for_speed=true)
virtual int32_t get_num_vectors() const =0
void set_property(EFeatureProperty p)
Definition: Features.cpp:300
void add(SGVector< T > &a, SGVector< T > &b, SGVector< T > &result, T alpha=1, T beta=1)
Parameter * m_parameters
Definition: SGObject.h:609
virtual void add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t *vec2, int32_t vec2_len, bool abs_val=false)=0
void add_col_vec(const SGMatrix< T > &A, index_t i, const SGVector< T > &b, SGMatrix< T > &result, T alpha=1, T beta=1)
void benchmark_dense_dot_range(int32_t repeats=5)
virtual float64_t dense_dot_sgvec(int32_t vec_idx1, const SGVector< float64_t > vec2)
Definition: DotFeatures.cpp:48
Range< T > range(T rend)
Definition: range.h:136
std::enable_if<!std::is_same< T, complex128_t >::value, float64_t >::type mean(const Container< T > &a)
Features that support dot products among other operations.
Definition: DotFeatures.h:44
virtual int32_t get_dim_feature_space() const =0
static float64_t get_curtime()
Definition: Time.h:116
virtual SGVector< float64_t > get_mean()
void add(bool *param, const char *name, const char *description="")
Definition: Parameter.cpp:38
#define SG_PRINT(...)
Definition: SGIO.h:136
static SGVector< float64_t > compute_mean(CDotFeatures *lhs, CDotFeatures *rhs)
#define ASSERT(x)
Definition: SGIO.h:176
void zero(Container< T > &a)
virtual void dense_dot_range_subset(int32_t *sub_index, int32_t num, float64_t *output, float64_t *alphas, float64_t *vec, int32_t dim, float64_t b)
double float64_t
Definition: common.h:60
A File access base class.
Definition: File.h:34
void benchmark_add_to_dense_vector(int32_t repeats=5)
static void fill_vector(T *vec, int32_t len, T value)
Definition: SGVector.cpp:279
void matrix_prod(SGMatrix< T > &A, SGVector< T > &b, SGVector< T > &result, bool transpose=false)
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The class Features is the base class of all feature objects.
Definition: Features.h:69
SGVector< float64_t > get_computed_dot_feature_vector(int32_t num)
float64_t combined_weight
feature weighting in combined dot features
Definition: DotFeatures.h:248
SGMatrix< float64_t > get_computed_dot_feature_matrix()
void set_column(index_t col, const SGVector< T > vec)
Definition: SGMatrix.cpp:406
index_t vlen
Definition: SGVector.h:571

SHOGUN Machine Learning Toolbox - Documentation