SHOGUN  6.1.3
Distance.cpp
Go to the documentation of this file.
1 /*
2  * this program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2006-2009 Christian Gehl
8  * Written (W) 2006-2009 Soeren Sonnenburg
9  * Copyright (C) 2006-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #include <shogun/base/Parallel.h>
13 #include <shogun/base/Parameter.h>
14 #include <shogun/base/progress.h>
15 #include <shogun/io/File.h>
16 #include <shogun/io/SGIO.h>
17 #include <shogun/lib/Signal.h>
18 #include <shogun/lib/Time.h>
19 #include <shogun/lib/common.h>
20 #include <shogun/lib/config.h>
21 
24 
25 #include <string.h>
26 #ifndef _WIN32
27 #include <unistd.h>
28 #endif
29 
30 #ifdef HAVE_OPENMP
31 #include <omp.h>
32 
33 #endif
34 
35 using namespace shogun;
36 
38 {
39  init();
40 }
41 
42 
44 {
45  init();
46  init(p_lhs, p_rhs);
47 }
48 
50 {
51  SG_FREE(precomputed_matrix);
52  precomputed_matrix=NULL;
53 
55 }
56 
57 bool CDistance::init(CFeatures* l, CFeatures* r)
58 {
59  REQUIRE(check_compatibility(l, r), "Features are not compatible!\n");
60 
61  //increase reference counts
62  SG_REF(l);
63  SG_REF(r);
64 
65  //remove references to previous features
67 
68  lhs=l;
69  rhs=r;
70 
73 
74  SG_FREE(precomputed_matrix);
75  precomputed_matrix=NULL ;
76 
77  return true;
78 }
79 
81 {
82  REQUIRE(l, "Left hand side features must be set!\n");
83  REQUIRE(r, "Right hand side features must be set!\n");
84 
86  "Right hand side of features (%s) must be of same type with left hand side features (%s)\n",
87  r->get_name(), l->get_name());
88 
89  if (l->support_compatible_class())
90  {
92  "Right hand side of features (%s) must be compatible with left hand side features (%s)\n",
93  r->get_name(), l->get_name());
94  }
95  else if (r->support_compatible_class())
96  {
98  "Right hand side of features (%s) must be compatible with left hand side features (%s)\n",
99  r->get_name(), l->get_name());
100  }
101  else
102  {
104  "Right hand side of features (%s) must be compatible with left hand side features (%s)\n",
105  r->get_name(), l->get_name());
106  }
107 
108  return true;
109 }
110 
111 void CDistance::load(CFile* loader)
112 {
115 }
116 
117 void CDistance::save(CFile* writer)
118 {
121 }
122 
124 {
125  SG_UNREF(rhs);
126  rhs = NULL;
127  num_rhs=0;
128 
129  SG_UNREF(lhs);
130  lhs = NULL;
131  num_lhs=0;
132 }
133 
135 {
136  SG_UNREF(lhs);
137  lhs = NULL;
138  num_lhs=0;
139 }
140 
143 {
144  SG_UNREF(rhs);
145  rhs = NULL;
146  num_rhs=0;
147 }
148 
150 {
151  //make sure features are compatible
152  REQUIRE(check_compatibility(lhs, r), "Features are not compatible!\n");
153 
154  //remove references to previous rhs features
155  CFeatures* tmp=rhs;
156 
157  rhs=r;
159 
160  SG_FREE(precomputed_matrix);
161  precomputed_matrix=NULL ;
162 
163  // return old features including reference count
164  return tmp;
165 }
166 
168 {
169  //make sure features are compatible
170  REQUIRE(check_compatibility(l, rhs), "Features are not compatible!\n");
171 
172  //remove references to previous rhs features
173  CFeatures* tmp=lhs;
174 
175  lhs=l;
177 
178  SG_FREE(precomputed_matrix);
179  precomputed_matrix=NULL ;
180 
181  // return old features including reference count
182  return tmp;
183 }
184 
185 float64_t CDistance::distance(int32_t idx_a, int32_t idx_b)
186 {
187  REQUIRE(idx_a < lhs->get_num_vectors() && idx_b < rhs->get_num_vectors() && \
188  idx_a >= 0 && idx_b >= 0,
189  "idx_a (%d) must be in [0,%d] and idx_b (%d) must be in [0,%d]\n",
190  idx_a, lhs->get_num_vectors()-1, idx_b, rhs->get_num_vectors()-1)
191 
192  ASSERT(lhs)
193  ASSERT(rhs)
194 
195  if (lhs==rhs)
196  {
197  int32_t num_vectors = lhs->get_num_vectors();
198 
199  if (idx_a>=num_vectors)
200  idx_a=2*num_vectors-1-idx_a;
201 
202  if (idx_b>=num_vectors)
203  idx_b=2*num_vectors-1-idx_b;
204  }
205 
206 
207  if (precompute_matrix && (precomputed_matrix==NULL) && (lhs==rhs))
209 
210  if (precompute_matrix && (precomputed_matrix!=NULL))
211  {
212  if (idx_a>=idx_b)
213  return precomputed_matrix[idx_a*(idx_a+1)/2+idx_b] ;
214  else
215  return precomputed_matrix[idx_b*(idx_b+1)/2+idx_a] ;
216  }
217 
218  return compute(idx_a, idx_b);
219 }
220 
221 void CDistance::run_distance_rhs(SGVector<float64_t>& result, const index_t idx_r_start, index_t idx_start, const index_t idx_stop, const index_t idx_a)
222 {
223  for(index_t i=idx_r_start; idx_start < idx_stop; ++i,++idx_start)
224  result.vector[i] = this->distance(idx_a,idx_start);
225 }
226 
227 void CDistance::run_distance_lhs(SGVector<float64_t>& result, const index_t idx_r_start, index_t idx_start, const index_t idx_stop, const index_t idx_b)
228 {
229  for(index_t i=idx_r_start; idx_start < idx_stop; ++i,++idx_start)
230  result.vector[i] = this->distance(idx_start,idx_b);
231 }
232 
234 {
235  int32_t num_left=lhs->get_num_vectors();
236  int32_t num_right=rhs->get_num_vectors();
237  SG_INFO("precomputing distance matrix (%ix%i)\n", num_left, num_right)
238 
239  ASSERT(num_left==num_right)
240  ASSERT(lhs==rhs)
241  int32_t num=num_left;
242 
243  SG_FREE(precomputed_matrix);
244  precomputed_matrix=SG_MALLOC(float32_t, num*(num+1)/2);
245 
246  for (auto i : progress(range(num), *this->io))
247  {
248  for (int32_t j=0; j<=i; j++)
249  precomputed_matrix[i*(i+1)/2+j] = compute(i,j) ;
250  }
251 }
252 
253 void CDistance::init()
254 {
255  precomputed_matrix = NULL;
256  precompute_matrix = false;
257  lhs = NULL;
258  rhs = NULL;
259  num_lhs=0;
260  num_rhs=0;
261 
262  m_parameters->add((CSGObject**) &lhs, "lhs",
263  "Feature vectors to occur on left hand side.");
264  m_parameters->add((CSGObject**) &rhs, "rhs",
265  "Feature vectors to occur on right hand side.");
266 }
267 
268 template <class T>
270 {
271  T* result = NULL;
272 
273  REQUIRE(has_features(), "no features assigned to distance\n")
274 
275  int32_t m=get_num_vec_lhs();
276  int32_t n=get_num_vec_rhs();
277 
278  int64_t total_num = int64_t(m)*n;
279  int64_t total=0;
280 
281  // if lhs == rhs and sizes match assume k(i,j)=k(j,i)
282  bool symmetric= (lhs && lhs==rhs && m==n);
283 
284  SG_DEBUG("returning distance matrix of size %dx%d\n", m, n)
285 
286  result=SG_MALLOC(T, total_num);
287 
289  range(total_num), *this->io, "PROGRESS: ", UTF8, []() { return true; });
290  int32_t num_threads;
291  int64_t step;
292  #pragma omp parallel shared(num_threads, step)
293  {
294 #ifdef HAVE_OPENMP
295  #pragma omp single
296  {
297  num_threads=omp_get_num_threads();
298  step=total_num/num_threads;
299  num_threads--;
300  }
301  int32_t thread_num=omp_get_thread_num();
302 #else
303  num_threads=0;
304  step=total_num;
305  int32_t thread_num=0;
306 #endif
307  bool verbose=(thread_num == 0);
308 
309  int32_t start=compute_row_start(thread_num*step, n, symmetric);
310  int32_t end=(thread_num==num_threads) ? m : compute_row_start((thread_num+1)*step, n, symmetric);
311 
312  for (int32_t i=start; i<end; i++)
313  {
314  int32_t j_start=0;
315 
316  if (symmetric)
317  j_start=i;
318 
319  for (int32_t j=j_start; j<n; j++)
320  {
321  float64_t v=this->distance(i,j);
322  result[i+j*m]=v;
323 
324  if (symmetric && i!=j)
325  result[j+i*m]=v;
326 
327  if (verbose)
328  {
329  total++;
330 
331  if (symmetric && i!=j)
332  total++;
333 
334  pb.print_progress();
335 
336  // TODO: replace with new signal
337  // if (CSignal::cancel_computations())
338  // break;
339  }
340  }
341  }
342  }
343  pb.complete();
344 
345  return SGMatrix<T>(result,m,n,true);
346 }
347 
348 template SGMatrix<float64_t> CDistance::get_distance_matrix<float64_t>();
349 template SGMatrix<float32_t> CDistance::get_distance_matrix<float32_t>();
virtual const char * get_name() const =0
virtual bool support_compatible_class() const
Definition: Features.h:340
#define SG_INFO(...)
Definition: SGIO.h:117
#define SG_RESET_LOCALE
Definition: SGIO.h:85
void do_precompute_matrix()
matrix precomputation
Definition: Distance.cpp:233
virtual bool has_features()
Definition: Distance.h:324
virtual bool get_feature_class_compatibility(EFeatureClass rhs) const
Definition: Features.cpp:355
int32_t index_t
Definition: common.h:72
PRange< T > progress(Range< T > range, const SGIO &io, std::string prefix="PROGRESS: ", SG_PRG_MODE mode=UTF8, std::function< bool()> condition=[](){return true;})
Definition: progress.h:712
virtual int32_t get_num_vec_lhs()
Definition: Distance.h:306
virtual CFeatures * replace_lhs(CFeatures *lhs)
Definition: Distance.cpp:167
virtual void remove_lhs()
takes all necessary steps if the lhs is removed from distance matrix
Definition: Distance.cpp:134
int32_t num_rhs
Definition: Distance.h:404
virtual int32_t get_num_vectors() const =0
virtual ~CDistance()
Definition: Distance.cpp:49
#define REQUIRE(x,...)
Definition: SGIO.h:181
Parameter * m_parameters
Definition: SGObject.h:609
void run_distance_rhs(SGVector< float64_t > &result, const index_t idx_r_start, index_t idx_start, const index_t idx_stop, const index_t idx_a)
Definition: Distance.cpp:221
Range< T > range(T rend)
Definition: range.h:136
#define SG_REF(x)
Definition: SGObject.h:52
#define SG_SET_LOCALE_C
Definition: SGIO.h:84
virtual bool check_compatibility(CFeatures *l, CFeatures *r)
Definition: Distance.cpp:80
void complete() const
Definition: progress.h:652
void add(bool *param, const char *name, const char *description="")
Definition: Parameter.cpp:38
#define ASSERT(x)
Definition: SGIO.h:176
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:124
virtual void remove_lhs_and_rhs()
Definition: Distance.cpp:123
double float64_t
Definition: common.h:60
void save(CFile *writer)
Definition: Distance.cpp:117
A File access base class.
Definition: File.h:34
void load(CFile *loader)
Definition: Distance.cpp:111
virtual EFeatureClass get_feature_class() const =0
void run_distance_lhs(SGVector< float64_t > &result, const index_t idx_r_start, index_t idx_start, const index_t idx_stop, const index_t idx_b)
Definition: Distance.cpp:227
virtual int32_t get_num_vec_rhs()
Definition: Distance.h:315
void print_progress() const
Definition: progress.h:617
int32_t num_lhs
Definition: Distance.h:402
virtual CFeatures * replace_rhs(CFeatures *rhs)
Definition: Distance.cpp:149
float float32_t
Definition: common.h:59
shogun matrix
int32_t compute_row_start(int64_t offs, int32_t n, bool symmetric)
Definition: Distance.h:173
virtual float64_t distance(int32_t idx_a, int32_t idx_b)
Definition: Distance.cpp:185
#define SG_UNREF(x)
Definition: SGObject.h:53
#define SG_DEBUG(...)
Definition: SGIO.h:106
bool precompute_matrix
Definition: Distance.h:394
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
CFeatures * lhs
feature vectors to occur on the left hand side
Definition: Distance.h:397
The class Features is the base class of all feature objects.
Definition: Features.h:69
CFeatures * rhs
feature vectors to occur on the right hand side
Definition: Distance.h:399
SGMatrix< float64_t > get_distance_matrix()
Definition: Distance.h:156
float32_t * precomputed_matrix
Definition: Distance.h:389
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Distance.cpp:57
virtual void remove_rhs()
takes all necessary steps if the rhs is removed from distance matrix
Definition: Distance.cpp:142
virtual float64_t compute(int32_t idx_a, int32_t idx_b)=0
virtual EFeatureType get_feature_type() const =0

SHOGUN Machine Learning Toolbox - Documentation