SHOGUN  6.1.3
DenseFeatures.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2010 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Written (W) 2011-2013 Heiko Strathmann
10  * Written (W) 2014-2017 Soumyajit De
11  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
12  * Copyright (C) 2010 Berlin Institute of Technology
13  */
14 
15 #include <shogun/base/some.h>
18 #include <shogun/io/SGIO.h>
19 #include <shogun/base/Parameter.h>
23 #include <algorithm>
24 #include <string.h>
25 
26 namespace shogun {
27 
28 template<class ST> CDenseFeatures<ST>::CDenseFeatures(int32_t size) : CDotFeatures(size)
29 {
30  init();
31 }
32 
33 template<class ST> CDenseFeatures<ST>::CDenseFeatures(const CDenseFeatures & orig) :
34  CDotFeatures(orig)
35 {
36  init();
39 
40  if (orig.m_subset_stack != NULL)
41  {
45  }
46 }
47 
49  CDotFeatures()
50 {
51  init();
52  set_feature_matrix(matrix);
53 }
54 
55 template<class ST> CDenseFeatures<ST>::CDenseFeatures(ST* src, int32_t num_feat, int32_t num_vec) :
56  CDotFeatures()
57 {
58  init();
59  set_feature_matrix(SGMatrix<ST>(src, num_feat, num_vec));
60 }
61 template<class ST> CDenseFeatures<ST>::CDenseFeatures(CFile* loader) :
62  CDotFeatures()
63 {
64  init();
65  load(loader);
66 }
67 
68 template<class ST> CFeatures* CDenseFeatures<ST>::duplicate() const
69 {
70  return new CDenseFeatures<ST>(*this);
71 }
72 
74 {
75  free_features();
76 }
77 
78 template<class ST> void CDenseFeatures<ST>::free_features()
79 {
82 }
83 
84 template<class ST> void CDenseFeatures<ST>::free_feature_matrix()
85 {
88  num_vectors = 0;
89  num_features = 0;
90 }
91 
92 template<class ST> ST* CDenseFeatures<ST>::get_feature_vector(int32_t num, int32_t& len, bool& dofree)
93 {
94  /* index conversion for subset, only for array access */
95  int32_t real_num=m_subset_stack->subset_idx_conversion(num);
96 
97  len = num_features;
98 
100  {
101  dofree = false;
102  return &feature_matrix.matrix[real_num * int64_t(num_features)];
103  }
104 
105  ST* feat = NULL;
106  dofree = false;
107 
108  if (feature_cache)
109  {
110  feat = feature_cache->lock_entry(real_num);
111 
112  if (feat)
113  return feat;
114  else
115  feat = feature_cache->set_entry(real_num);
116  }
117 
118  if (!feat)
119  dofree = true;
120  feat = compute_feature_vector(num, len, feat);
121 
122  if (get_num_preprocessors())
123  {
124  int32_t tmp_len = len;
125  ST* tmp_feat_before = feat;
126  ST* tmp_feat_after = NULL;
127 
128  for (int32_t i = 0; i < get_num_preprocessors(); i++)
129  {
132  // temporary hack
134  SGVector<ST>(tmp_feat_before, tmp_len));
135  tmp_feat_after = applied.vector;
136  SG_UNREF(p);
137 
138  if (i != 0) // delete feature vector, except for the the first one, i.e., feat
139  SG_FREE(tmp_feat_before);
140  tmp_feat_before = tmp_feat_after;
141  }
142 
143  // note: tmp_feat_after should be checked as it is used by memcpy
144  if (tmp_feat_after)
145  {
146  sg_memcpy(feat, tmp_feat_after, sizeof(ST) * tmp_len);
147  SG_FREE(tmp_feat_after);
148 
149  len = tmp_len;
150  }
151  }
152  return feat;
153 }
154 
155 template<class ST> void CDenseFeatures<ST>::set_feature_vector(SGVector<ST> vector, int32_t num)
156 {
157  /* index conversion for subset, only for array access */
158  int32_t real_num=m_subset_stack->subset_idx_conversion(num);
159 
160  if (num>=get_num_vectors())
161  {
162  SG_ERROR("Index out of bounds (number of vectors %d, you "
163  "requested %d)\n", get_num_vectors(), num);
164  }
165 
166  if (!feature_matrix.matrix)
167  SG_ERROR("Requires a in-memory feature matrix\n")
168 
169  if (vector.vlen != num_features)
170  SG_ERROR(
171  "Vector not of length %d (has %d)\n", num_features, vector.vlen);
172 
173  sg_memcpy(&feature_matrix.matrix[real_num * int64_t(num_features)], vector.vector,
174  int64_t(num_features) * sizeof(ST));
175 }
176 
178 {
179  /* index conversion for subset, only for array access */
180  int32_t real_num=m_subset_stack->subset_idx_conversion(num);
181 
182  if (num >= get_num_vectors())
183  {
184  SG_ERROR("Index out of bounds (number of vectors %d, you "
185  "requested %d)\n", get_num_vectors(), real_num);
186  }
187 
188  int32_t vlen;
189  bool do_free;
190  ST* vector= get_feature_vector(num, vlen, do_free);
191  return SGVector<ST>(vector, vlen, do_free);
192 }
193 
194 template<class ST> void CDenseFeatures<ST>::free_feature_vector(ST* feat_vec, int32_t num, bool dofree)
195 {
196  if (feature_cache)
198 
199  if (dofree)
200  SG_FREE(feat_vec);
201 }
202 
203 template<class ST> void CDenseFeatures<ST>::free_feature_vector(SGVector<ST> vec, int32_t num)
204 {
205  free_feature_vector(vec.vector, num, false);
206  vec=SGVector<ST>();
207 }
208 
209 template<class ST> void CDenseFeatures<ST>::vector_subset(int32_t* idx, int32_t idx_len)
210 {
212  SG_ERROR("A subset is set, cannot call vector_subset\n")
213 
215  ASSERT(idx_len<=num_vectors)
216 
217  int32_t num_vec = num_vectors;
218  num_vectors = idx_len;
219 
220  int32_t old_ii = -1;
221 
222  for (int32_t i = 0; i < idx_len; i++)
223  {
224  int32_t ii = idx[i];
225  ASSERT(old_ii<ii)
226 
227  if (ii < 0 || ii >= num_vec)
228  SG_ERROR("Index out of range: should be 0<%d<%d\n", ii, num_vec)
229 
230  if (i == ii)
231  continue;
232 
233  sg_memcpy(&feature_matrix.matrix[int64_t(num_features) * i],
234  &feature_matrix.matrix[int64_t(num_features) * ii],
235  num_features * sizeof(ST));
236  old_ii = ii;
237  }
238 }
239 
240 template<class ST> void CDenseFeatures<ST>::feature_subset(int32_t* idx, int32_t idx_len)
241 {
243  SG_ERROR("A subset is set, cannot call feature_subset\n")
244 
246  ASSERT(idx_len<=num_features)
247  int32_t num_feat = num_features;
248  num_features = idx_len;
249 
250  for (int32_t i = 0; i < num_vectors; i++)
251  {
252  ST* src = &feature_matrix.matrix[int64_t(num_feat) * i];
253  ST* dst = &feature_matrix.matrix[int64_t(num_features) * i];
254 
255  int32_t old_jj = -1;
256  for (int32_t j = 0; j < idx_len; j++)
257  {
258  int32_t jj = idx[j];
259  ASSERT(old_jj<jj)
260  if (jj < 0 || jj >= num_feat)
261  SG_ERROR(
262  "Index out of range: should be 0<%d<%d\n", jj, num_feat);
263 
264  dst[j] = src[jj];
265  old_jj = jj;
266  }
267  }
268 }
269 
270 template <class ST>
272 {
273  if (!m_subset_stack->has_subsets())
274  return feature_matrix;
275 
277  copy_feature_matrix(target);
278  return target;
279 }
280 
281 template <class ST>
283 {
284  REQUIRE(column_offset>=0, "Column offset (%d) cannot be negative!\n", column_offset);
285  REQUIRE(!target.equals(feature_matrix), "Source and target feature matrices cannot be the same\n");
286 
287  index_t num_vecs=get_num_vectors();
288  index_t num_cols=num_vecs+column_offset;
289 
290  REQUIRE(target.matrix!=nullptr, "Provided matrix is not allocated!\n");
291  REQUIRE(target.num_rows==num_features,
292  "Number of rows of given matrix (%d) should be equal to the number of features (%d)!\n",
293  target.num_rows, num_features);
294  REQUIRE(target.num_cols>=num_cols,
295  "Number of cols of given matrix (%d) should be at least %d!\n",
296  target.num_cols, num_cols);
297 
298  if (!m_subset_stack->has_subsets())
299  {
300  auto src=feature_matrix.matrix;
301  auto dest=target.matrix+int64_t(num_features)*column_offset;
302  sg_memcpy(dest, src, feature_matrix.size()*sizeof(ST));
303  }
304  else
305  {
306  for (int32_t i=0; i<num_vecs; ++i)
307  {
308  auto real_i=m_subset_stack->subset_idx_conversion(i);
309  auto src=feature_matrix.matrix+real_i*int64_t(num_features);
310  auto dest=target.matrix+int64_t(num_features)*(column_offset+i);
311  sg_memcpy(dest, src, num_features*sizeof(ST));
312  }
313  }
314 }
315 
317 {
318  SGMatrix<ST> st_feature_matrix=feature_matrix;
323  return st_feature_matrix;
324 }
325 
327 {
330  feature_matrix = matrix;
331  num_features = matrix.num_rows;
332  num_vectors = matrix.num_cols;
333 }
334 
335 template<class ST> ST* CDenseFeatures<ST>::get_feature_matrix(int32_t &num_feat, int32_t &num_vec)
336 {
337  num_feat = num_features;
338  num_vec = num_vectors;
339  return feature_matrix.matrix;
340 }
341 
343 {
344  int32_t num_feat;
345  int32_t num_vec;
346  ST* fm = get_transposed(num_feat, num_vec);
347 
348  return new CDenseFeatures<ST>(fm, num_feat, num_vec);
349 }
350 
351 template<class ST> ST* CDenseFeatures<ST>::get_transposed(int32_t &num_feat, int32_t &num_vec)
352 {
353  num_feat = get_num_vectors();
354  num_vec = num_features;
355 
356  int32_t old_num_vec=get_num_vectors();
357 
358  ST* fm = SG_MALLOC(ST, int64_t(num_feat) * num_vec);
359 
360  for (int32_t i=0; i<old_num_vec; i++)
361  {
363 
364  for (int32_t j=0; j<vec.vlen; j++)
365  fm[j*int64_t(old_num_vec)+i]=vec.vector[j];
366 
367  free_feature_vector(vec, i);
368  }
369 
370  return fm;
371 }
372 
374 {
376 
377  int32_t num_feat = df->get_dim_feature_space();
378  int32_t num_vec = df->get_num_vectors();
379 
380  ASSERT(num_feat>0 && num_vec>0)
381 
383  feature_matrix = SGMatrix<ST>(num_feat, num_vec);
384 
385  for (int32_t i = 0; i < num_vec; i++)
386  {
388  ASSERT(num_feat==v.vlen)
389 
390  for (int32_t j = 0; j < num_feat; j++)
391  feature_matrix.matrix[i * int64_t(num_feat) + j] = (ST) v.vector[j];
392  }
393  num_features = num_feat;
394  num_vectors = num_vec;
395 }
396 
397 template<class ST> bool CDenseFeatures<ST>::apply_preprocessor(bool force_preprocessing)
398 {
400  SG_ERROR("A subset is set, cannot call apply_preproc\n")
401 
402  SG_DEBUG("force: %d\n", force_preprocessing)
403 
405  {
406  for (int32_t i = 0; i < get_num_preprocessors(); i++)
407  {
408  if ((!is_preprocessed(i) || force_preprocessing))
409  {
410  set_preprocessed(i);
413  SG_INFO("preprocessing using preproc %s\n", p->get_name())
414 
415  if (p->apply_to_feature_matrix(this).matrix == NULL)
416  {
417  SG_UNREF(p);
418  return false;
419  }
420  SG_UNREF(p);
421 
422  }
423  }
424 
425  return true;
426  }
427  else
428  {
429  if (!feature_matrix.matrix)
430  SG_ERROR("no feature matrix\n")
431 
432  if (!get_num_preprocessors())
433  SG_ERROR("no preprocessors available\n")
434 
435  return false;
436  }
437 }
438 
439 template<class ST> int32_t CDenseFeatures<ST>::get_num_vectors() const
440 {
442 }
443 
444 template<class ST> int32_t CDenseFeatures<ST>::get_num_features() const { return num_features; }
445 
446 template<class ST> void CDenseFeatures<ST>::set_num_features(int32_t num)
447 {
448  num_features = num;
450 }
451 
452 template<class ST> void CDenseFeatures<ST>::set_num_vectors(int32_t num)
453 {
455  SG_ERROR("A subset is set, cannot call set_num_vectors\n")
456 
457  num_vectors = num;
459 }
460 
461 template<class ST> void CDenseFeatures<ST>::initialize_cache()
462 {
464  SG_ERROR("A subset is set, cannot call initialize_cache\n")
465 
466  if (num_features && num_vectors)
467  {
470  num_vectors);
472  }
473 }
474 
475 template<class ST> EFeatureClass CDenseFeatures<ST>::get_feature_class() const { return C_DENSE; }
476 
477 template<class ST> bool CDenseFeatures<ST>::reshape(int32_t p_num_features, int32_t p_num_vectors)
478 {
480  SG_ERROR("A subset is set, cannot call reshape\n")
481 
482  if (p_num_features * p_num_vectors
483  == this->num_features * this->num_vectors)
484  {
485  num_features = p_num_features;
486  num_vectors = p_num_vectors;
487  return true;
488  } else
489  return false;
490 }
491 
492 template<class ST> int32_t CDenseFeatures<ST>::get_dim_feature_space() const { return num_features; }
493 
494 template<class ST> float64_t CDenseFeatures<ST>::dot(int32_t vec_idx1, CDotFeatures* df,
495  int32_t vec_idx2)
496 {
497  ASSERT(df)
501 
502  int32_t len1, len2;
503  bool free1, free2;
504 
505  ST* vec1 = get_feature_vector(vec_idx1, len1, free1);
506  ST* vec2 = sf->get_feature_vector(vec_idx2, len2, free2);
507  SGVector<ST> sg_vec1(vec1, len1, false);
508  SGVector<ST> sg_vec2(vec2, len2, false);
509 
510  float64_t result = linalg::dot(sg_vec1, sg_vec2);
511 
512  free_feature_vector(vec1, vec_idx1, free1);
513  sf->free_feature_vector(vec2, vec_idx2, free2);
514 
515  return result;
516 }
517 
518 template<class ST> void CDenseFeatures<ST>::add_to_dense_vec(float64_t alpha, int32_t vec_idx1,
519  float64_t* vec2, int32_t vec2_len, bool abs_val)
520 {
521  ASSERT(vec2_len == num_features)
522 
523  int32_t vlen;
524  bool vfree;
525  ST* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
526 
527  ASSERT(vlen == num_features)
528 
529  if (abs_val)
530  {
531  for (int32_t i = 0; i < num_features; i++)
532  vec2[i] += alpha * CMath::abs(vec1[i]);
533  }
534  else
535  {
536  for (int32_t i = 0; i < num_features; i++)
537  vec2[i] += alpha * vec1[i];
538  }
539 
540  free_feature_vector(vec1, vec_idx1, vfree);
541 }
542 
543 template<>
545  float64_t* vec2, int32_t vec2_len, bool abs_val)
546 {
547  ASSERT(vec2_len == num_features)
548 
549  int32_t vlen;
550  bool vfree;
551  float64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
552 
553  ASSERT(vlen == num_features)
554 
555  if (abs_val)
556  {
557  for (int32_t i = 0; i < num_features; i++)
558  vec2[i] += alpha * CMath::abs(vec1[i]);
559  }
560  else
561  {
563  }
564 
565  free_feature_vector(vec1, vec_idx1, vfree);
566 }
567 
568 template<class ST> int32_t CDenseFeatures<ST>::get_nnz_features_for_vector(int32_t num)
569 {
570  return num_features;
571 }
572 
573 template<class ST> void* CDenseFeatures<ST>::get_feature_iterator(int32_t vector_index)
574 {
575  if (vector_index>=get_num_vectors())
576  {
577  SG_ERROR("Index out of bounds (number of vectors %d, you "
578  "requested %d)\n", get_num_vectors(), vector_index);
579  }
580 
581  dense_feature_iterator* iterator = SG_MALLOC(dense_feature_iterator, 1);
582  iterator->vec = get_feature_vector(vector_index, iterator->vlen,
583  iterator->vfree);
584  iterator->vidx = vector_index;
585  iterator->index = 0;
586  return iterator;
587 }
588 
589 template<class ST> bool CDenseFeatures<ST>::get_next_feature(int32_t& index, float64_t& value,
590  void* iterator)
591 {
592  dense_feature_iterator* it = (dense_feature_iterator*) iterator;
593  if (!it || it->index >= it->vlen)
594  return false;
595 
596  index = it->index++;
597  value = (float64_t) it->vec[index];
598 
599  return true;
600 }
601 
602 template<class ST> void CDenseFeatures<ST>::free_feature_iterator(void* iterator)
603 {
604  if (!iterator)
605  return;
606 
607  dense_feature_iterator* it = (dense_feature_iterator*) iterator;
608  free_feature_vector(it->vec, it->vidx, it->vfree);
609  SG_FREE(it);
610 }
611 
613 {
614  SGMatrix<ST> feature_matrix_copy(num_features, indices.vlen);
615 
616  for (index_t i=0; i<indices.vlen; ++i)
617  {
618  index_t real_idx=m_subset_stack->subset_idx_conversion(indices.vector[i]);
619  sg_memcpy(&feature_matrix_copy.matrix[i*num_features],
620  &feature_matrix.matrix[real_idx*num_features],
621  num_features*sizeof(ST));
622  }
623 
624  CFeatures* result=new CDenseFeatures(feature_matrix_copy);
625  SG_REF(result);
626  return result;
627 }
628 
629 template<class ST>
631 {
632  SG_DEBUG("Entering!\n");
633 
634  // sanity checks
635  index_t max=CMath::max(dims.vector, dims.vlen);
636  index_t min=CMath::min(dims.vector, dims.vlen);
637  REQUIRE(max<num_features && min>=0,
638  "Provided dimensions is in the range [%d, %d] but they "
639  "have to be within [0, %d]! But it \n", min, max, num_features);
640 
641  SGMatrix<ST> feature_matrix_copy(dims.vlen, get_num_vectors());
642 
643  for (index_t i=0; i<dims.vlen; ++i)
644  {
645  for (index_t j=0; j<get_num_vectors(); ++j)
646  {
648  feature_matrix_copy(i, j)=feature_matrix(dims[i], real_idx);
649  }
650  }
651 
652  CFeatures* result=new CDenseFeatures(feature_matrix_copy);
653  SG_REF(result);
654 
655  SG_DEBUG("Leaving!\n");
656  return result;
657 }
658 
659 template<class ST>
661 {
662  CFeatures* shallow_copy_features=NULL;
663 
664  SG_SDEBUG("Using underlying feature matrix with %d dimensions and %d feature vectors!\n", num_features, num_vectors);
665  SGMatrix<ST> shallow_copy_matrix(feature_matrix);
666  shallow_copy_features=new CDenseFeatures<ST>(shallow_copy_matrix);
667  SG_REF(shallow_copy_features);
669  shallow_copy_features->add_subset(m_subset_stack->get_last_subset()->get_subset_idx());
670 
671  return shallow_copy_features;
672 }
673 
674 template<class ST> ST* CDenseFeatures<ST>::compute_feature_vector(int32_t num, int32_t& len,
675  ST* target)
676 {
678  len = 0;
679  return NULL;
680 }
681 
682 template<class ST> void CDenseFeatures<ST>::init()
683 {
684  num_vectors = 0;
685  num_features = 0;
686 
688  feature_cache = NULL;
689 
690  set_generic<ST>();
691 
692  /* not store number of vectors in subset */
693  SG_ADD(&num_vectors, "num_vectors", "Number of vectors.", MS_NOT_AVAILABLE);
694  SG_ADD(&num_features, "num_features", "Number of features.", MS_NOT_AVAILABLE);
695  SG_ADD(&feature_matrix, "feature_matrix",
696  "Matrix of feature vectors / 1 vector per column.", MS_NOT_AVAILABLE);
697 }
698 
699 #define GET_FEATURE_TYPE(f_type, sg_type) \
700 template<> EFeatureType CDenseFeatures<sg_type>::get_feature_type() const \
701 { \
702  return f_type; \
703 }
704 
707 GET_FEATURE_TYPE(F_BYTE, uint8_t)
708 GET_FEATURE_TYPE(F_BYTE, int8_t)
709 GET_FEATURE_TYPE(F_SHORT, int16_t)
710 GET_FEATURE_TYPE(F_WORD, uint16_t)
711 GET_FEATURE_TYPE(F_INT, int32_t)
712 GET_FEATURE_TYPE(F_UINT, uint32_t)
713 GET_FEATURE_TYPE(F_LONG, int64_t)
714 GET_FEATURE_TYPE(F_ULONG, uint64_t)
718 #undef GET_FEATURE_TYPE
719 
720 template<> float64_t CDenseFeatures<bool>::dense_dot(int32_t vec_idx1,
721  const float64_t* vec2, int32_t vec2_len)
722 {
723  ASSERT(vec2_len == num_features)
724 
725  int32_t vlen;
726  bool vfree;
727  bool* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
728 
729  ASSERT(vlen == num_features)
730  float64_t result = 0;
731 
732  for (int32_t i = 0; i < num_features; i++)
733  result += vec1[i] ? vec2[i] : 0;
734 
735  free_feature_vector(vec1, vec_idx1, vfree);
736 
737  return result;
738 }
739 
740 template<> float64_t CDenseFeatures<char>::dense_dot(int32_t vec_idx1,
741  const float64_t* vec2, int32_t vec2_len)
742 {
743  ASSERT(vec2_len == num_features)
744 
745  int32_t vlen;
746  bool vfree;
747  char* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
748 
749  ASSERT(vlen == num_features)
750  float64_t result = 0;
751 
752  for (int32_t i = 0; i < num_features; i++)
753  result += vec1[i] * vec2[i];
754 
755  free_feature_vector(vec1, vec_idx1, vfree);
756 
757  return result;
758 }
759 
760 template<> float64_t CDenseFeatures<int8_t>::dense_dot(int32_t vec_idx1,
761  const float64_t* vec2, int32_t vec2_len)
762 {
763  ASSERT(vec2_len == num_features)
764 
765  int32_t vlen;
766  bool vfree;
767  int8_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
768 
769  ASSERT(vlen == num_features)
770  float64_t result = 0;
771 
772  for (int32_t i = 0; i < num_features; i++)
773  result += vec1[i] * vec2[i];
774 
775  free_feature_vector(vec1, vec_idx1, vfree);
776 
777  return result;
778 }
779 
781  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
782 {
783  ASSERT(vec2_len == num_features)
784 
785  int32_t vlen;
786  bool vfree;
787  uint8_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
788 
789  ASSERT(vlen == num_features)
790  float64_t result = 0;
791 
792  for (int32_t i = 0; i < num_features; i++)
793  result += vec1[i] * vec2[i];
794 
795  free_feature_vector(vec1, vec_idx1, vfree);
796 
797  return result;
798 }
799 
801  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
802 {
803  ASSERT(vec2_len == num_features)
804 
805  int32_t vlen;
806  bool vfree;
807  int16_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
808 
809  ASSERT(vlen == num_features)
810  float64_t result = 0;
811 
812  for (int32_t i = 0; i < num_features; i++)
813  result += vec1[i] * vec2[i];
814 
815  free_feature_vector(vec1, vec_idx1, vfree);
816 
817  return result;
818 }
819 
821  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
822 {
823  ASSERT(vec2_len == num_features)
824 
825  int32_t vlen;
826  bool vfree;
827  uint16_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
828 
829  ASSERT(vlen == num_features)
830  float64_t result = 0;
831 
832  for (int32_t i = 0; i < num_features; i++)
833  result += vec1[i] * vec2[i];
834 
835  free_feature_vector(vec1, vec_idx1, vfree);
836 
837  return result;
838 }
839 
841  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
842 {
843  ASSERT(vec2_len == num_features)
844 
845  int32_t vlen;
846  bool vfree;
847  int32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
848 
849  ASSERT(vlen == num_features)
850  float64_t result = 0;
851 
852  for (int32_t i = 0; i < num_features; i++)
853  result += vec1[i] * vec2[i];
854 
855  free_feature_vector(vec1, vec_idx1, vfree);
856 
857  return result;
858 }
859 
861  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
862 {
863  ASSERT(vec2_len == num_features)
864 
865  int32_t vlen;
866  bool vfree;
867  uint32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
868 
869  ASSERT(vlen == num_features)
870  float64_t result = 0;
871 
872  for (int32_t i = 0; i < num_features; i++)
873  result += vec1[i] * vec2[i];
874 
875  free_feature_vector(vec1, vec_idx1, vfree);
876 
877  return result;
878 }
879 
881  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
882 {
883  ASSERT(vec2_len == num_features)
884 
885  int32_t vlen;
886  bool vfree;
887  int64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
888 
889  ASSERT(vlen == num_features)
890  float64_t result = 0;
891 
892  for (int32_t i = 0; i < num_features; i++)
893  result += vec1[i] * vec2[i];
894 
895  free_feature_vector(vec1, vec_idx1, vfree);
896 
897  return result;
898 }
899 
901  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
902 {
903  ASSERT(vec2_len == num_features)
904 
905  int32_t vlen;
906  bool vfree;
907  uint64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
908 
909  ASSERT(vlen == num_features)
910  float64_t result = 0;
911 
912  for (int32_t i = 0; i < num_features; i++)
913  result += vec1[i] * vec2[i];
914 
915  free_feature_vector(vec1, vec_idx1, vfree);
916 
917  return result;
918 }
919 
921  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
922 {
923  ASSERT(vec2_len == num_features)
924 
925  int32_t vlen;
926  bool vfree;
927  float32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
928 
929  ASSERT(vlen == num_features)
930  float64_t result = 0;
931 
932  for (int32_t i = 0; i < num_features; i++)
933  result += vec1[i] * vec2[i];
934 
935  free_feature_vector(vec1, vec_idx1, vfree);
936 
937  return result;
938 }
939 
941  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
942 {
943  ASSERT(vec2_len == num_features)
944 
945  int32_t vlen;
946  bool vfree;
947  float64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
948  SGVector<float64_t> sg_vec1(vec1, vlen, false);
949 
950  ASSERT(vlen == num_features)
951  SGVector<float64_t> tmp(const_cast<float64_t*>(vec2), vec2_len, false);
952  float64_t result = linalg::dot(sg_vec1, tmp);
953 
954  free_feature_vector(vec1, vec_idx1, vfree);
955 
956  return result;
957 }
958 
960  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
961 {
962  ASSERT(vec2_len == num_features)
963 
964  int32_t vlen;
965  bool vfree;
966  floatmax_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
967 
968  ASSERT(vlen == num_features)
969  float64_t result = 0;
970 
971  for (int32_t i = 0; i < num_features; i++)
972  result += vec1[i] * vec2[i];
973 
974  free_feature_vector(vec1, vec_idx1, vfree);
975 
976  return result;
977 }
978 
979 template<class ST> bool CDenseFeatures<ST>::is_equal(CDenseFeatures* rhs)
980 {
981  if ( num_features != rhs->num_features || num_vectors != rhs->num_vectors )
982  return false;
983 
984  ST* vec1;
985  ST* vec2;
986  int32_t v1len, v2len;
987  bool v1free, v2free, stop = false;
988 
989  for (int32_t i = 0; i < num_vectors; i++)
990  {
991  vec1 = get_feature_vector(i, v1len, v1free);
992  vec2 = rhs->get_feature_vector(i, v2len, v2free);
993 
994  if (v1len!=v2len)
995  stop = true;
996 
997  for (int32_t j=0; j<v1len; j++)
998  {
999  if (vec1[j]!=vec2[j])
1000  stop = true;
1001  }
1002 
1003  free_feature_vector(vec1, i, v1free);
1004  free_feature_vector(vec2, i, v2free);
1005 
1006  if (stop)
1007  return false;
1008  }
1009 
1010  return true;
1011 }
1012 
1013 template <class ST>
1015 {
1016  SG_DEBUG("Entering.\n");
1017 
1018  REQUIRE(others!=nullptr, "The list of other feature instances is not initialized!\n");
1019 
1020  auto current=others->get_first_element();
1021  auto total_num_vectors=get_num_vectors();
1022  auto unref_required=others->get_delete_data();
1023 
1024  while (current!=nullptr)
1025  {
1026  auto casted=dynamic_cast<CDenseFeatures<ST>*>(current);
1027 
1028  REQUIRE(casted!=nullptr, "Provided object's type (%s) must match own type (%s)!\n",
1029  current->get_name(), get_name());
1030  REQUIRE(num_features==casted->num_features,
1031  "Provided feature object has different dimension (%d) than this one (%d)!\n",
1032  casted->num_features, num_features);
1033 
1034  total_num_vectors+=casted->get_num_vectors();
1035 
1036  if (unref_required)
1037  SG_UNREF(current);
1038 
1039  current=others->get_next_element();
1040  }
1041 
1042  SGMatrix<ST> data(num_features, total_num_vectors);
1043  index_t num_copied=0;
1044  copy_feature_matrix(data, num_copied);
1045  num_copied+=get_num_vectors();
1046 
1047  current=others->get_first_element();
1048 
1049  while (current!=nullptr)
1050  {
1051  auto casted=static_cast<CDenseFeatures<ST>*>(current);
1052  casted->copy_feature_matrix(data, num_copied);
1053  num_copied+=casted->get_num_vectors();
1054 
1055  if (unref_required)
1056  SG_UNREF(current);
1057 
1058  current=others->get_next_element();
1059  }
1060 
1061  auto result=new CDenseFeatures<ST>(data);
1062 
1063  SG_DEBUG("Leaving.\n");
1064  return result;
1065 }
1066 
1067 template <class ST>
1069 {
1070  auto list=some<CList>();
1071  list->append_element(other);
1072  return create_merged_copy(list);
1073 }
1074 
1075 template<class ST>
1077 {
1078  SGMatrix<ST> matrix;
1079  matrix.load(loader);
1080  set_feature_matrix(matrix);
1081 }
1082 
1083 template<class ST>
1085 {
1086  feature_matrix.save(writer);
1087 }
1088 
1090 {
1091  REQUIRE(base_features->get_feature_class() == C_DENSE,
1092  "base_features must be of dynamic type CDenseFeatures\n")
1093 
1094  return (CDenseFeatures< ST >*) base_features;
1095 }
1096 
1097 template class CDenseFeatures<bool>;
1098 template class CDenseFeatures<char>;
1099 template class CDenseFeatures<int8_t>;
1100 template class CDenseFeatures<uint8_t>;
1101 template class CDenseFeatures<int16_t>;
1102 template class CDenseFeatures<uint16_t>;
1103 template class CDenseFeatures<int32_t>;
1104 template class CDenseFeatures<uint32_t>;
1105 template class CDenseFeatures<int64_t>;
1106 template class CDenseFeatures<uint64_t>;
1107 template class CDenseFeatures<float32_t>;
1108 template class CDenseFeatures<float64_t>;
1109 template class CDenseFeatures<floatmax_t>;
1110 }
virtual const char * get_name() const =0
SGVector< index_t > get_subset_idx() const
Definition: Subset.h:48
CSubsetStack * m_subset_stack
Definition: Features.h:378
#define SG_INFO(...)
Definition: SGIO.h:117
virtual int32_t get_dim_feature_space() const
SGMatrix< ST > get_feature_matrix()
void set_feature_matrix(SGMatrix< ST > matrix)
CSGObject * get_next_element()
Definition: List.h:185
virtual float64_t dense_dot(int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len)
virtual CFeatures * copy_subset(SGVector< index_t > indices)
static T max(T a, T b)
Definition: Math.h:149
int32_t index_t
Definition: common.h:72
void set_num_vectors(int32_t num)
index_t get_size() const
Definition: SubsetStack.h:80
int32_t num_features
number of features in cache
void set_preprocessed(int32_t num)
Definition: Features.cpp:143
CDenseFeatures(int32_t size=0)
void set_feature_vector(SGVector< ST > vector, int32_t num)
void feature_subset(int32_t *idx, int32_t idx_len)
void obtain_from_dot(CDotFeatures *df)
void load(CFile *loader)
Definition: SGMatrix.cpp:1173
virtual int32_t get_num_vectors() const =0
virtual void * get_feature_iterator(int32_t vector_index)
virtual bool reshape(int32_t p_num_features, int32_t p_num_vectors)
#define SG_ERROR(...)
Definition: SGIO.h:128
#define REQUIRE(x,...)
Definition: SGIO.h:181
#define SG_NOTIMPLEMENTED
Definition: SGIO.h:138
T dot(const SGVector< T > &a, const SGVector< T > &b)
CPreprocessor * get_preprocessor(int32_t num) const
Definition: Features.cpp:93
virtual EFeatureType get_feature_type() const
virtual bool is_equal(CDenseFeatures *rhs)
int32_t get_num_features() const
bool get_delete_data()
Definition: List.h:575
Features that support dot products among other operations.
Definition: DotFeatures.h:44
void unlock_entry(int64_t number)
Definition: Cache.h:156
#define SG_REF(x)
Definition: SGObject.h:52
EFeatureClass
shogun feature class
Definition: FeatureTypes.h:38
ST * get_feature_vector(int32_t num, int32_t &len, bool &dofree)
class to add subset support to another class. A CSubsetStackStack instance should be added and wrappe...
Definition: SubsetStack.h:37
virtual int32_t get_dim_feature_space() const =0
virtual bool get_next_feature(int32_t &index, float64_t &value, void *iterator)
int32_t get_num_preprocessors() const
Definition: Features.cpp:155
T * set_entry(int64_t number)
Definition: Cache.h:169
void save(CFile *saver)
Definition: SGMatrix.cpp:1195
CSubset * get_last_subset() const
Definition: SubsetStack.h:98
CSGObject * get_first_element()
Definition: List.h:151
virtual float64_t dot(int32_t vec_idx1, CDotFeatures *df, int32_t vec_idx2)
#define ASSERT(x)
Definition: SGIO.h:176
int32_t num_vectors
number of vectors in cache
virtual void remove_all_subsets()
Definition: SubsetStack.cpp:62
Template class DensePreprocessor, base class for preprocessors (cf. CPreprocessor) that apply to CDen...
SGMatrix< ST > feature_matrix
bool is_preprocessed(int32_t num) const
Definition: Features.cpp:149
double float64_t
Definition: common.h:60
virtual const char * get_name() const
long double floatmax_t
Definition: common.h:61
void free_feature_vector(ST *feat_vec, int32_t num, bool dofree)
virtual void add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t *vec2, int32_t vec2_len, bool abs_val=false)
A File access base class.
Definition: File.h:34
index_t num_rows
Definition: SGMatrix.h:495
shogun vector
virtual bool apply_preprocessor(bool force_preprocessing=false)
index_t subset_idx_conversion(index_t idx) const
Definition: SubsetStack.h:105
virtual EFeatureClass get_feature_class() const =0
T * lock_entry(int64_t number)
Definition: Cache.h:140
index_t num_cols
Definition: SGMatrix.h:497
virtual CFeatures * shallow_subset_copy()
CCache< ST > * feature_cache
CDenseFeatures< ST > * get_transposed()
SGMatrix< ST > steal_feature_matrix()
float float32_t
Definition: common.h:59
virtual CFeatures * duplicate() const
void set_num_features(int32_t num)
The class DenseFeatures implements dense feature matrices.
Definition: LDA.h:40
#define SG_UNREF(x)
Definition: SGObject.h:53
#define SG_DEBUG(...)
Definition: SGIO.h:106
bool equals(const SGMatrix< T > &other) const
Definition: SGMatrix.cpp:144
virtual EFeatureClass get_feature_class() const
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
static CDenseFeatures * obtain_from_generic(CFeatures *const base_features)
#define SG_SDEBUG(...)
Definition: SGIO.h:153
int32_t get_cache_size() const
Definition: Features.cpp:160
void vector_subset(int32_t *idx, int32_t idx_len)
The class Features is the base class of all feature objects.
Definition: Features.h:69
virtual SGMatrix< ST > apply_to_feature_matrix(CFeatures *features)=0
virtual CFeatures * copy_dimension_subset(SGVector< index_t > dims)
void copy_feature_matrix(SGMatrix< ST > target, index_t column_offset=0) const
SGVector< float64_t > get_computed_dot_feature_vector(int32_t num)
virtual bool has_subsets() const
Definition: SubsetStack.h:89
virtual void save(CFile *saver)
virtual void load(CFile *loader)
void clean_preprocessors()
Definition: Features.cpp:116
virtual ST * compute_feature_vector(int32_t num, int32_t &len, ST *target=NULL)
#define SG_ADD(...)
Definition: SGObject.h:93
virtual int32_t get_nnz_features_for_vector(int32_t num)
T max(const Container< T > &a)
virtual SGVector< ST > apply_to_feature_vector(SGVector< ST > vector)=0
virtual int32_t get_num_vectors() const
static T min(T a, T b)
Definition: Math.h:138
CFeatures * create_merged_copy(CList *other)
virtual void add_subset(SGVector< index_t > subset)
Definition: Features.cpp:310
virtual EFeatureType get_feature_type() const =0
int64_t size() const
Definition: SGMatrix.h:275
Class List implements a doubly connected list for low-level-objects.
Definition: List.h:84
index_t vlen
Definition: SGVector.h:571
static void vec1_plus_scalar_times_vec2(T *vec1, const T scalar, const T *vec2, int32_t n)
x=x+alpha*y
Definition: SGVector.cpp:586
#define GET_FEATURE_TYPE(f_type, sg_type)
static T abs(T a)
Definition: Math.h:161
virtual void free_feature_iterator(void *iterator)

SHOGUN Machine Learning Toolbox - Documentation