SHOGUN  6.1.3
WeightedDegreeStringKernel.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #include <shogun/base/Parallel.h>
13 #include <shogun/base/Parameter.h>
14 #include <shogun/base/progress.h>
15 #include <shogun/io/SGIO.h>
16 #include <shogun/lib/Signal.h>
17 #include <shogun/lib/Trie.h>
18 #include <shogun/lib/common.h>
19 
24 
25 #ifdef HAVE_PTHREAD
26 #include <pthread.h>
27 
28 #endif
29 
30 using namespace shogun;
31 
32 #ifndef DOXYGEN_SHOULD_SKIP_THIS
33 struct S_THREAD_PARAM_WD
34 {
35 
36  int32_t* vec;
37  float64_t* result;
38  float64_t* weights;
40  CTrie<DNATrie>* tries;
41  float64_t factor;
42  int32_t j;
43  int32_t start;
44  int32_t end;
45  int32_t length;
46  int32_t* vec_idx;
47 };
48 #endif // DOXYGEN_SHOULD_SKIP_THIS
49 
51 : CStringKernel<char>()
52 {
53  init();
54 }
55 
56 
58  int32_t d, EWDKernType t)
59 : CStringKernel<char>()
60 {
61  init();
62 
63  degree=d;
64  type=t;
65 
66  if (type!=E_EXTERNAL)
68 }
69 
71 : CStringKernel<char>(10)
72 {
73  init();
74 
76  degree=w.vlen;
77 
78  weights=SG_MALLOC(float64_t, degree*(1+max_mismatch));
81 
82  for (int32_t i=0; i<degree*(1+max_mismatch); i++)
83  weights[i]=w.vector[i];
84 }
85 
88 : CStringKernel<char>(10)
89 {
90  init();
91  degree=d;
92  type=E_WD;
95  init(l, r);
96 }
97 
99 {
100  cleanup();
101 
102  SG_FREE(weights);
103  weights=NULL;
104  weights_degree=0;
105  weights_length=0;
106 
107  SG_FREE(block_weights);
108  block_weights=NULL;
109 
110  SG_FREE(position_weights);
111  position_weights=NULL;
112 
113  SG_FREE(weights_buffer);
114  weights_buffer=NULL;
115 }
116 
117 
119 {
120  SG_DEBUG("deleting CWeightedDegreeStringKernel optimization\n")
122 
123  if (tries!=NULL)
124  tries->destroy();
125 
127 }
128 
130 {
131  ASSERT(lhs)
132 
133  seq_length=((CStringFeatures<char>*) lhs)->get_max_vector_length();
134 
135  if (tries!=NULL)
136  {
137  tries->destroy() ;
139  }
140 }
141 
142 bool CWeightedDegreeStringKernel::init(CFeatures* l, CFeatures* r)
143 {
144  int32_t lhs_changed=(lhs!=l);
145  int32_t rhs_changed=(rhs!=r);
146 
148 
149  SG_DEBUG("lhs_changed: %i\n", lhs_changed)
150  SG_DEBUG("rhs_changed: %i\n", rhs_changed)
151 
154 
155  int32_t len=sf_l->get_max_vector_length();
156  if (lhs_changed && !sf_l->have_same_length(len))
157  SG_ERROR("All strings in WD kernel must have same length (lhs wrong)!\n")
158 
159  if (rhs_changed && !sf_r->have_same_length(len))
160  SG_ERROR("All strings in WD kernel must have same length (rhs wrong)!\n")
161 
163  alphabet=sf_l->get_alphabet();
164  CAlphabet* ralphabet=sf_r->get_alphabet();
165 
166  if (!((alphabet->get_alphabet()==DNA) || (alphabet->get_alphabet()==RNA)))
167  properties &= ((uint64_t) (-1)) ^ (KP_LINADD | KP_BATCHEVALUATION);
168 
169  ASSERT(ralphabet->get_alphabet()==alphabet->get_alphabet())
170  SG_UNREF(ralphabet);
171 
172  if (tries!=NULL) {
174  SG_UNREF(tries);
175  }
178 
180 
181  return init_normalizer();
182 }
183 
185 {
186  SG_DEBUG("deleting CWeightedDegreeStringKernel optimization\n")
188 
189  SG_FREE(block_weights);
190  block_weights=NULL;
191 
192  if (tries!=NULL)
193  {
194  tries->destroy();
195  SG_UNREF(tries);
196  tries=NULL;
197  }
198 
199  seq_length=0;
200  tree_initialized = false;
201 
203  alphabet=NULL;
204 
206 }
207 
208 bool CWeightedDegreeStringKernel::init_optimization(int32_t count, int32_t* IDX, float64_t* alphas, int32_t tree_num)
209 {
210  if (tree_num<0)
211  SG_DEBUG("deleting CWeightedDegreeStringKernel optimization\n")
212 
214 
215  if (tree_num<0)
216  SG_DEBUG("initializing CWeightedDegreeStringKernel optimization\n")
217 
218  for (auto i : progress(range(count), *this->io))
219  {
220  if (tree_num<0)
221  {
222 
223  if (max_mismatch==0)
224  add_example_to_tree(IDX[i], alphas[i]) ;
225  else
226  add_example_to_tree_mismatch(IDX[i], alphas[i]) ;
227 
228  //SG_DEBUG("number of used trie nodes: %i\n", tries.get_num_used_nodes())
229  }
230  else
231  {
232  if (max_mismatch==0)
233  add_example_to_single_tree(IDX[i], alphas[i], tree_num) ;
234  else
235  add_example_to_single_tree_mismatch(IDX[i], alphas[i], tree_num) ;
236  }
237  }
238 
239  //tries.compact_nodes(NO_CHILD, 0, weights) ;
240 
241  set_is_initialized(true) ;
242  return true ;
243 }
244 
246 {
247  if (get_is_initialized())
248  {
249  if (tries!=NULL)
251  set_is_initialized(false);
252  return true;
253  }
254 
255  return false;
256 }
257 
258 
260  char* avec, int32_t alen, char* bvec, int32_t blen)
261 {
262  float64_t sum = 0.0;
263 
264  for (int32_t i=0; i<alen; i++)
265  {
266  float64_t sumi = 0.0;
267  int32_t mismatches=0;
268 
269  for (int32_t j=0; (i+j<alen) && (j<degree); j++)
270  {
271  if (avec[i+j]!=bvec[i+j])
272  {
273  mismatches++ ;
274  if (mismatches>max_mismatch)
275  break ;
276  } ;
277  sumi += weights[j+degree*mismatches];
278  }
279  if (position_weights!=NULL)
280  sum+=position_weights[i]*sumi ;
281  else
282  sum+=sumi ;
283  }
284  return sum ;
285 }
286 
288  char* avec, int32_t alen, char* bvec, int32_t blen)
289 {
290  ASSERT(alen==blen)
291 
292  float64_t sum=0;
293  int32_t match_len=-1;
294 
295  for (int32_t i=0; i<alen; i++)
296  {
297  if (avec[i]==bvec[i])
298  match_len++;
299  else
300  {
301  if (match_len>=0)
302  sum+=block_weights[match_len];
303  match_len=-1;
304  }
305  }
306 
307  if (match_len>=0)
308  sum+=block_weights[match_len];
309 
310  return sum;
311 }
312 
314  char* avec, int32_t alen, char* bvec, int32_t blen)
315 {
316  float64_t sum = 0.0;
317 
318  for (int32_t i=0; i<alen; i++)
319  {
320  float64_t sumi = 0.0;
321 
322  for (int32_t j=0; (i+j<alen) && (j<degree); j++)
323  {
324  if (avec[i+j]!=bvec[i+j])
325  break ;
326  sumi += weights[j];
327  }
328  if (position_weights!=NULL)
329  sum+=position_weights[i]*sumi ;
330  else
331  sum+=sumi ;
332  }
333  return sum ;
334 }
335 
337  char* avec, int32_t alen, char* bvec, int32_t blen)
338 {
339  float64_t sum = 0.0;
340 
341  for (int32_t i=0; i<alen; i++)
342  {
343  float64_t sumi=0.0;
344  for (int32_t j=0; (i+j<alen) && (j<degree); j++)
345  {
346  if (avec[i+j]!=bvec[i+j])
347  break;
348  sumi += weights[i*degree+j];
349  }
350  if (position_weights!=NULL)
351  sum += position_weights[i]*sumi ;
352  else
353  sum += sumi ;
354  }
355 
356  return sum ;
357 }
358 
359 
360 float64_t CWeightedDegreeStringKernel::compute(int32_t idx_a, int32_t idx_b)
361 {
362  int32_t alen, blen;
363  bool free_avec, free_bvec;
364  char* avec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec);
365  char* bvec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec);
366  float64_t result=0;
367 
368  if (max_mismatch==0 && length==0 && block_computation)
369  result=compute_using_block(avec, alen, bvec, blen);
370  else
371  {
372  if (max_mismatch>0)
373  result=compute_with_mismatch(avec, alen, bvec, blen);
374  else if (length==0)
375  result=compute_without_mismatch(avec, alen, bvec, blen);
376  else
377  result=compute_without_mismatch_matrix(avec, alen, bvec, blen);
378  }
379  ((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec);
380  ((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec);
381 
382  return result;
383 }
384 
385 
387  int32_t idx, float64_t alpha)
388 {
391 
392  int32_t len=0;
393  bool free_vec;
394  char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len, free_vec);
395  ASSERT(max_mismatch==0)
396  int32_t *vec=SG_MALLOC(int32_t, len);
397 
398  for (int32_t i=0; i<len; i++)
399  vec[i]=alphabet->remap_to_bin(char_vec[i]);
400  ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec);
401 
402  if (length == 0 || max_mismatch > 0)
403  {
404  for (int32_t i=0; i<len; i++)
405  {
406  float64_t alpha_pw=alpha;
407  /*if (position_weights!=NULL)
408  alpha_pw *= position_weights[i] ;*/
409  if (alpha_pw==0.0)
410  continue;
411  ASSERT(tries)
412  tries->add_to_trie(i, 0, vec, normalizer->normalize_lhs(alpha_pw, idx), weights, (length!=0));
413  }
414  }
415  else
416  {
417  for (int32_t i=0; i<len; i++)
418  {
419  float64_t alpha_pw=alpha;
420  /*if (position_weights!=NULL)
421  alpha_pw = alpha*position_weights[i] ;*/
422  if (alpha_pw==0.0)
423  continue ;
424  ASSERT(tries)
425  tries->add_to_trie(i, 0, vec, normalizer->normalize_lhs(alpha_pw, idx), weights, (length!=0));
426  }
427  }
428  SG_FREE(vec);
429  tree_initialized=true ;
430 }
431 
433  int32_t idx, float64_t alpha, int32_t tree_num)
434 {
437 
438  int32_t len;
439  bool free_vec;
440  char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len, free_vec);
441  ASSERT(max_mismatch==0)
442  int32_t *vec = SG_MALLOC(int32_t, len);
443 
444  for (int32_t i=tree_num; i<tree_num+degree && i<len; i++)
445  vec[i]=alphabet->remap_to_bin(char_vec[i]);
446  ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec);
447 
448 
449  ASSERT(tries)
450  if (alpha!=0.0)
451  tries->add_to_trie(tree_num, 0, vec, normalizer->normalize_lhs(alpha, idx), weights, (length!=0));
452 
453  SG_FREE(vec);
454  tree_initialized=true ;
455 }
456 
458 {
459  ASSERT(tries)
462 
463  int32_t len ;
464  bool free_vec;
465  char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len, free_vec);
466 
467  int32_t *vec = SG_MALLOC(int32_t, len);
468 
469  for (int32_t i=0; i<len; i++)
470  vec[i]=alphabet->remap_to_bin(char_vec[i]);
471  ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec);
472 
473  for (int32_t i=0; i<len; i++)
474  {
475  if (alpha!=0.0)
476  tries->add_example_to_tree_mismatch_recursion(NO_CHILD, i, normalizer->normalize_lhs(alpha, idx), &vec[i], len-i, 0, 0, max_mismatch, weights);
477  }
478 
479  SG_FREE(vec);
480  tree_initialized=true ;
481 }
482 
484  int32_t idx, float64_t alpha, int32_t tree_num)
485 {
486  ASSERT(tries)
489 
490  int32_t len=0;
491  bool free_vec;
492  char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len, free_vec);
493  int32_t *vec=SG_MALLOC(int32_t, len);
494 
495  for (int32_t i=tree_num; i<len && i<tree_num+degree; i++)
496  vec[i]=alphabet->remap_to_bin(char_vec[i]);
497  ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec);
498 
499  if (alpha!=0.0)
500  {
502  NO_CHILD, tree_num, normalizer->normalize_lhs(alpha, idx), &vec[tree_num], len-tree_num,
503  0, 0, max_mismatch, weights);
504  }
505 
506  SG_FREE(vec);
507  tree_initialized=true;
508 }
509 
510 
512 {
515 
516  int32_t len=0;
517  bool free_vec;
518  char* char_vec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx, len, free_vec);
519  ASSERT(char_vec && len>0)
520  int32_t *vec=SG_MALLOC(int32_t, len);
521 
522  for (int32_t i=0; i<len; i++)
523  vec[i]=alphabet->remap_to_bin(char_vec[i]);
524  ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec);
525 
526  float64_t sum=0;
527  ASSERT(tries)
528  for (int32_t i=0; i<len; i++)
529  sum+=tries->compute_by_tree_helper(vec, len, i, i, i, weights, (length!=0));
530 
531  SG_FREE(vec);
532  return normalizer->normalize_rhs(sum, idx);
533 }
534 
536  int32_t idx, float64_t* LevelContrib)
537 {
540 
541  int32_t len ;
542  bool free_vec;
543  char* char_vec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx, len, free_vec);
544 
545  int32_t *vec = SG_MALLOC(int32_t, len);
546 
547  for (int32_t i=0; i<len; i++)
548  vec[i]=alphabet->remap_to_bin(char_vec[i]);
549  ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec);
550 
551  ASSERT(tries)
552  for (int32_t i=0; i<len; i++)
553  {
554  tries->compute_by_tree_helper(vec, len, i, i, i, LevelContrib,
555  normalizer->normalize_rhs(1.0, idx),
556  mkl_stepsize, weights, (length!=0));
557  }
558 
559  SG_FREE(vec);
560 }
561 
563 {
564  ASSERT(tries)
565  return tries->compute_abs_weights(len);
566 }
567 
569 {
570  ASSERT(degree>0)
571  ASSERT(p_type==E_WD)
572 
573  SG_FREE(weights);
574  weights=SG_MALLOC(float64_t, degree);
576  weights_length=1;
577 
578  if (weights)
579  {
580  int32_t i;
581  float64_t sum=0;
582  for (i=0; i<degree; i++)
583  {
584  weights[i]=degree-i;
585  sum+=weights[i];
586  }
587  for (i=0; i<degree; i++)
588  weights[i]/=sum;
589 
590  for (i=0; i<degree; i++)
591  {
592  for (int32_t j=1; j<=max_mismatch; j++)
593  {
594  if (j<i+1)
595  {
596  int32_t nk=CMath::nchoosek(i+1, j);
597  weights[i+j*degree]=weights[i]/(nk*CMath::pow(3.0,j));
598  }
599  else
600  weights[i+j*degree]= 0;
601  }
602  }
603 
604  if (which_degree>=0)
605  {
606  ASSERT(which_degree<degree)
607  for (i=0; i<degree; i++)
608  {
609  if (i!=which_degree)
610  weights[i]=0;
611  else
612  weights[i]=1;
613  }
614  }
615  return true;
616  }
617  else
618  return false;
619 }
620 
622 {
623  float64_t* ws=new_weights.matrix;
624  int32_t d=new_weights.num_rows;
625  int32_t len=new_weights.num_cols;
626 
627  if (d!=degree || len<0)
628  SG_ERROR("WD: Dimension mismatch (should be (seq_length | 1) x degree) got (%d x %d)\n", len, degree)
629 
630  degree=d;
631  length=len;
632 
633  if (len <= 0)
634  len=1;
635 
638 
639 
640  SG_DEBUG("Creating weights of size %dx%d\n", weights_degree, weights_length)
641  int32_t num_weights=weights_degree*weights_length;
642  SG_FREE(weights);
643  weights=SG_MALLOC(float64_t, num_weights);
644 
645  for (int32_t i=0; i<degree*len; i++)
646  weights[i]=ws[i];
647 
648  return true;
649 }
650 
652  float64_t* pws, int32_t len)
653 {
654  if (len==0)
655  {
656  SG_FREE(position_weights);
657  position_weights=NULL;
658  ASSERT(tries)
660  }
661 
662  if (seq_length!=len)
663  SG_ERROR("seq_length = %i, position_weights_length=%i\n", seq_length, len)
664 
665  SG_FREE(position_weights);
666  position_weights=SG_MALLOC(float64_t, len);
668  ASSERT(tries)
670 
671  if (position_weights)
672  {
673  for (int32_t i=0; i<len; i++)
674  position_weights[i]=pws[i];
675  return true;
676  }
677  else
678  return false;
679 }
680 
682 {
683  SG_FREE(block_weights);
685 
686  int32_t k;
687  float64_t d=degree; // use float to evade rounding errors below
688 
689  for (k=0; k<degree; k++)
690  block_weights[k]=
691  (-CMath::pow(k, 3)+(3*d-3)*CMath::pow(k, 2)+(9*d-2)*k+6*d)/(3*d*(d+1));
692  for (k=degree; k<seq_length; k++)
693  block_weights[k]=(-d+3*k+4)/3;
694 
695  return true;
696 }
697 
699 {
700  ASSERT(weights)
701  SG_FREE(block_weights);
703 
704  int32_t i=0;
705  block_weights[0]=weights[0];
706  for (i=1; i<CMath::max(seq_length,degree); i++)
707  block_weights[i]=0;
708 
709  for (i=1; i<CMath::max(seq_length,degree); i++)
710  {
712 
713  float64_t contrib=0;
714  for (int32_t j=0; j<CMath::min(degree,i+1); j++)
715  contrib+=weights[j];
716 
717  block_weights[i]+=contrib;
718  }
719  return true;
720 }
721 
723 {
724  SG_FREE(block_weights);
725  block_weights=SG_MALLOC(float64_t, seq_length);
726 
727  for (int32_t i=1; i<seq_length+1 ; i++)
728  block_weights[i-1]=1.0/seq_length;
729  return true;
730 }
731 
733 {
734  SG_FREE(block_weights);
735  block_weights=SG_MALLOC(float64_t, seq_length);
736 
737  for (int32_t i=1; i<seq_length+1 ; i++)
738  block_weights[i-1]=degree*i;
739 
740  return true;
741 }
742 
744 {
745  SG_FREE(block_weights);
746  block_weights=SG_MALLOC(float64_t, seq_length);
747 
748  for (int32_t i=1; i<degree+1 ; i++)
749  block_weights[i-1]=((float64_t) i)*i;
750 
751  for (int32_t i=degree+1; i<seq_length+1 ; i++)
752  block_weights[i-1]=i;
753 
754  return true;
755 }
756 
758 {
759  SG_FREE(block_weights);
760  block_weights=SG_MALLOC(float64_t, seq_length);
761 
762  for (int32_t i=1; i<degree+1 ; i++)
763  block_weights[i-1]=((float64_t) i)*i*i;
764 
765  for (int32_t i=degree+1; i<seq_length+1 ; i++)
766  block_weights[i-1]=i;
767  return true;
768 }
769 
771 {
772  SG_FREE(block_weights);
773  block_weights=SG_MALLOC(float64_t, seq_length);
774 
775  for (int32_t i=1; i<degree+1 ; i++)
776  block_weights[i-1]=exp(((float64_t) i/10.0));
777 
778  for (int32_t i=degree+1; i<seq_length+1 ; i++)
779  block_weights[i-1]=i;
780 
781  return true;
782 }
783 
785 {
786  SG_FREE(block_weights);
787  block_weights=SG_MALLOC(float64_t, seq_length);
788 
789  for (int32_t i=1; i<degree+1 ; i++)
791 
792  for (int32_t i=degree+1; i<seq_length+1 ; i++)
793  block_weights[i-1]=i-degree+1+CMath::pow(CMath::log(degree+1.0),2);
794 
795  return true;
796 }
797 
799 {
800  switch (type)
801  {
802  case E_WD:
804  case E_EXTERNAL:
806  case E_BLOCK_CONST:
807  return init_block_weights_const();
808  case E_BLOCK_LINEAR:
809  return init_block_weights_linear();
810  case E_BLOCK_SQPOLY:
811  return init_block_weights_sqpoly();
812  case E_BLOCK_CUBICPOLY:
814  case E_BLOCK_EXP:
815  return init_block_weights_exp();
816  case E_BLOCK_LOG:
817  return init_block_weights_log();
818  };
819  return false;
820 }
821 
822 
824 {
825  S_THREAD_PARAM_WD* params = (S_THREAD_PARAM_WD*) p;
826  int32_t j=params->j;
827  CWeightedDegreeStringKernel* wd=params->kernel;
828  CTrie<DNATrie>* tries=params->tries;
829  float64_t* weights=params->weights;
830  int32_t length=params->length;
831  int32_t* vec=params->vec;
832  float64_t* result=params->result;
833  float64_t factor=params->factor;
834  int32_t* vec_idx=params->vec_idx;
835 
837  CAlphabet* alpha=wd->alphabet;
838 
839  for (int32_t i=params->start; i<params->end; i++)
840  {
841  int32_t len=0;
842  bool free_vec;
843  char* char_vec=rhs_feat->get_feature_vector(vec_idx[i], len, free_vec);
844  for (int32_t k=j; k<CMath::min(len,j+wd->get_degree()); k++)
845  vec[k]=alpha->remap_to_bin(char_vec[k]);
846  rhs_feat->free_feature_vector(char_vec, vec_idx[i], free_vec);
847 
848  ASSERT(tries)
849 
850  result[i]+=factor*
851  wd->normalizer->normalize_rhs(tries->compute_by_tree_helper(vec, len, j, j, j, weights, (length!=0)), vec_idx[i]);
852  }
853 
854  SG_UNREF(rhs_feat);
855 
856  return NULL;
857 }
858 
860  int32_t num_vec, int32_t* vec_idx, float64_t* result, int32_t num_suppvec,
861  int32_t* IDX, float64_t* alphas, float64_t factor)
862 {
863  ASSERT(tries)
866  ASSERT(rhs)
867  ASSERT(num_vec<=rhs->get_num_vectors())
868  ASSERT(num_vec>0)
869  ASSERT(vec_idx)
870  ASSERT(result)
872 
873  int32_t num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length();
874  ASSERT(num_feat>0)
875  // TODO: port to use OpenMP backend instead of pthread
876 #ifdef HAVE_PTHREAD
877  int32_t num_threads=parallel->get_num_threads();
878 #else
879  int32_t num_threads=1;
880 #endif
881  ASSERT(num_threads>0)
882  int32_t* vec=SG_MALLOC(int32_t, num_threads*num_feat);
883  auto pb = progress(range(num_feat), *this->io);
884 
885  if (num_threads < 2)
886  {
887  // TODO: replace with the new signal
888  // for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++)
889  for (int32_t j = 0; j < num_feat; j++)
890  {
891  init_optimization(num_suppvec, IDX, alphas, j);
892  S_THREAD_PARAM_WD params;
893  params.vec=vec;
894  params.result=result;
895  params.weights=weights;
896  params.kernel=this;
897  params.tries=tries;
898  params.factor=factor;
899  params.j=j;
900  params.start=0;
901  params.end=num_vec;
902  params.length=length;
903  params.vec_idx=vec_idx;
904  compute_batch_helper((void*) &params);
905 
906  pb.print_progress();
907  }
908  pb.complete();
909  }
910 #ifdef HAVE_PTHREAD
911  else
912  {
913  // TODO: replace with the new signal
914  // for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++)
915  for (int32_t j = 0; j < num_feat; j++)
916  {
917  init_optimization(num_suppvec, IDX, alphas, j);
918  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
919  S_THREAD_PARAM_WD* params = SG_MALLOC(S_THREAD_PARAM_WD, num_threads);
920  int32_t step= num_vec/num_threads;
921  int32_t t;
922 
923  for (t=0; t<num_threads-1; t++)
924  {
925  params[t].vec=&vec[num_feat*t];
926  params[t].result=result;
927  params[t].weights=weights;
928  params[t].kernel=this;
929  params[t].tries=tries;
930  params[t].factor=factor;
931  params[t].j=j;
932  params[t].start = t*step;
933  params[t].end = (t+1)*step;
934  params[t].length=length;
935  params[t].vec_idx=vec_idx;
936  pthread_create(&threads[t], NULL, CWeightedDegreeStringKernel::compute_batch_helper, (void*)&params[t]);
937  }
938  params[t].vec=&vec[num_feat*t];
939  params[t].result=result;
940  params[t].weights=weights;
941  params[t].kernel=this;
942  params[t].tries=tries;
943  params[t].factor=factor;
944  params[t].j=j;
945  params[t].start=t*step;
946  params[t].end=num_vec;
947  params[t].length=length;
948  params[t].vec_idx=vec_idx;
949  compute_batch_helper((void*) &params[t]);
950 
951  for (t=0; t<num_threads-1; t++)
952  pthread_join(threads[t], NULL);
953  pb.print_progress();
954 
955  SG_FREE(params);
956  SG_FREE(threads);
957  }
958  pb.complete();
959  }
960 #endif
961 
962  SG_FREE(vec);
963 
964  //really also free memory as this can be huge on testing especially when
965  //using the combined kernel
967 }
968 
970 {
971  if (type==E_EXTERNAL && max!=0)
972  return false;
973 
975 
976  if (lhs!=NULL && rhs!=NULL)
977  return init(lhs, rhs);
978  else
979  return true;
980 }
981 
982 void CWeightedDegreeStringKernel::init()
983 {
984  weights=NULL;
985  weights_degree=0;
986  weights_length=0;
987 
988  position_weights=NULL;
990 
991  weights_buffer=NULL;
992  mkl_stepsize=1;
993  degree=1;
994  length=0;
995 
996  max_mismatch=0;
997  seq_length=0;
998 
999  block_weights=NULL;
1000  block_computation=true;
1001  type=E_WD;
1002  which_degree=-1;
1003  tries=NULL;
1004 
1005  tree_initialized=false;
1006  alphabet=NULL;
1007 
1008  lhs=NULL;
1009  rhs=NULL;
1010 
1012 
1014 
1016  "weights", "WD Kernel weights.");
1018  "position_weights",
1019  "Weights per position.");
1020  SG_ADD(&mkl_stepsize, "mkl_stepsize", "MKL step size.", MS_AVAILABLE);
1021  SG_ADD(&degree, "degree", "Order of WD kernel.", MS_AVAILABLE);
1022  SG_ADD(&max_mismatch, "max_mismatch",
1023  "Number of allowed mismatches.", MS_AVAILABLE);
1024  SG_ADD(&block_computation, "block_computation",
1025  "If block computation shall be used.", MS_NOT_AVAILABLE);
1026  SG_ADD((machine_int_t*) &type, "type",
1027  "WeightedDegree kernel type.", MS_AVAILABLE);
1028  SG_ADD(&which_degree, "which_degree",
1029  "The selected degree. All degrees are used by default (for value -1).",
1030  MS_AVAILABLE);
1031  SG_ADD((CSGObject**) &alphabet, "alphabet",
1032  "Alphabet of Features.", MS_NOT_AVAILABLE);
1033 }
RNA - letters A,C,G,U.
Definition: Alphabet.h:32
float64_t compute_with_mismatch(char *avec, int32_t alen, char *bvec, int32_t blen)
void add_example_to_tree_mismatch_recursion(int32_t tree, int32_t i, float64_t alpha, int32_t *vec, int32_t len_rem, int32_t degree_rec, int32_t mismatch_rec, int32_t max_mismatch, float64_t *weights)
Definition: Trie.h:1234
virtual void cleanup()
Definition: Kernel.cpp:172
DNA - letters A,C,G,T.
Definition: Alphabet.h:26
float64_t * compute_abs_weights(int32_t &len)
Definition: Trie.h:1214
static T max(T a, T b)
Definition: Math.h:149
void add_example_to_single_tree(int32_t idx, float64_t weight, int32_t tree_num)
int32_t get_num_threads() const
Definition: Parallel.cpp:97
float64_t compute_using_block(char *avec, int32_t alen, char *bvec, int32_t blen)
PRange< T > progress(Range< T > range, const SGIO &io, std::string prefix="PROGRESS: ", SG_PRG_MODE mode=UTF8, std::function< bool()> condition=[](){return true;})
Definition: progress.h:712
float64_t compute_by_tree_helper(int32_t *vec, int32_t len, int32_t seq_pos, int32_t tree_pos, int32_t weight_pos, float64_t *weights, bool degree_times_position_weights)
Definition: Trie.h:1722
friend class CFirstElementKernelNormalizer
EAlphabet get_alphabet() const
Definition: Alphabet.h:130
virtual float64_t normalize_rhs(float64_t value, int32_t idx_rhs)=0
CFeatures * get_rhs()
void add_example_to_single_tree_mismatch(int32_t idx, float64_t weight, int32_t tree_num)
#define SG_ERROR(...)
Definition: SGIO.h:128
void set_is_initialized(bool p_init)
The class Alphabet implements an alphabet and alphabet utility functions.
Definition: Alphabet.h:91
Parameter * m_parameters
Definition: SGObject.h:609
void add_to_trie(int32_t i, int32_t seq_offset, int32_t *vec, float32_t alpha, float64_t *weights, bool degree_times_position_weights)
Definition: Trie.h:1496
bool set_position_weights(float64_t *pws, int32_t len)
Parallel * parallel
Definition: SGObject.h:603
Range< T > range(T rend)
Definition: range.h:136
virtual bool set_normalizer(CKernelNormalizer *normalizer_)
uint8_t remap_to_bin(uint8_t c)
Definition: Alphabet.h:159
void create(int32_t len, bool p_use_compact_terminal_nodes=true)
Definition: Trie.h:1156
virtual void compute_batch(int32_t num_vec, int32_t *vec_idx, float64_t *target, int32_t num_suppvec, int32_t *IDX, float64_t *alphas, float64_t factor=1.0)
bool get_is_initialized()
#define ASSERT(x)
Definition: SGIO.h:176
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:124
virtual bool init_optimization(int32_t count, int32_t *IDX, float64_t *alphas)
float64_t compute_without_mismatch(char *avec, int32_t alen, char *bvec, int32_t blen)
double float64_t
Definition: common.h:60
void free_feature_vector(ST *feat_vec, int32_t num, bool dofree)
index_t num_rows
Definition: SGMatrix.h:495
void compute_by_tree(int32_t idx, float64_t *LevelContrib)
void destroy()
Definition: Trie.h:1135
The Weighted Degree String kernel.
float64_t compute_without_mismatch_matrix(char *avec, int32_t alen, char *bvec, int32_t blen)
virtual bool init(CFeatures *l, CFeatures *r)
SGVector< ST > get_feature_vector(int32_t num)
index_t num_cols
Definition: SGMatrix.h:497
void delete_trees(bool p_use_compact_terminal_nodes=true)
Definition: Trie.h:1171
virtual bool init_normalizer()
Definition: Kernel.cpp:167
CFeatures * rhs
feature vectors to occur on right hand side
static int64_t nchoosek(int32_t n, int32_t k)
Definition: Math.h:1009
#define SG_UNREF(x)
Definition: SGObject.h:53
void add_vector(bool **param, index_t *length, const char *name, const char *description="")
Definition: Parameter.cpp:335
#define SG_DEBUG(...)
Definition: SGIO.h:106
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
T sum(const Container< T > &a, bool no_diag=false)
int machine_int_t
Definition: common.h:69
CFeatures * lhs
feature vectors to occur on left hand side
The class Features is the base class of all feature objects.
Definition: Features.h:69
void add_example_to_tree(int32_t idx, float64_t weight)
static float64_t log(float64_t v)
Definition: Math.h:714
virtual int32_t get_max_vector_length()
virtual void remove_lhs()
Definition: Kernel.cpp:655
float64_t compute(int32_t idx_a, int32_t idx_b)
CKernelNormalizer * normalizer
#define SG_ADD(...)
Definition: SGObject.h:93
T max(const Container< T > &a)
virtual float64_t normalize_lhs(float64_t value, int32_t idx_lhs)=0
static T min(T a, T b)
Definition: Math.h:138
void set_position_weights(float64_t *p_position_weights)
Definition: Trie.h:485
void add_matrix(bool **param, index_t *length_y, index_t *length_x, const char *name, const char *description="")
Definition: Parameter.cpp:944
Template class StringKernel, is the base class of all String Kernels.
Definition: StringKernel.h:26
void add_example_to_tree_mismatch(int32_t idx, float64_t weight)
bool set_weights(SGMatrix< float64_t > new_weights)
bool have_same_length(int32_t len=-1)
static int32_t pow(bool x, int32_t n)
Definition: Math.h:474
index_t vlen
Definition: SGVector.h:571

SHOGUN Machine Learning Toolbox - Documentation