SHOGUN  6.1.3
FactorGraphModel.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Shell Hu
8  * Copyright (C) 2013 Shell Hu
9  */
10 
16 
17 #include <unordered_map>
18 typedef std::unordered_map<int32_t, int32_t> factor_counts_type;
19 
20 using namespace shogun;
21 
24 {
25  init();
26 }
27 
29  EMAPInferType inf_type, bool verbose) : CStructuredModel(features, labels)
30 {
31  init();
32  m_inf_type = inf_type;
33  m_verbose = verbose;
34 }
35 
37 {
39 }
40 
41 void CFactorGraphModel::init()
42 {
43  SG_ADD((CSGObject**)&m_factor_types, "factor_types", "Array of factor types", MS_NOT_AVAILABLE);
44  SG_ADD(&m_w_cache, "w_cache", "Cache of global parameters", MS_NOT_AVAILABLE);
45  SG_ADD(&m_w_map, "w_map", "Parameter mapping", MS_NOT_AVAILABLE);
46 
49  m_verbose = false;
50 
52 }
53 
55 {
56  REQUIRE(ftype->get_w_dim() > 0, "%s::add_factor_type(): number of parameters can't be 0!\n",
57  get_name());
58 
59  // check whether this ftype has been added
60  int32_t id = ftype->get_type_id();
61  for (int32_t fi = 0; fi < m_factor_types->get_num_elements(); ++fi)
62  {
63  CFactorType* ft= dynamic_cast<CFactorType*>(m_factor_types->get_element(fi));
64  if (id == ft->get_type_id())
65  {
66  SG_UNREF(ft);
67  SG_PRINT("%s::add_factor_type(): factor_type (id = %d) has been added!\n",
68  get_name(), id);
69 
70  return;
71  }
72 
73  SG_UNREF(ft);
74  }
75 
76  SGVector<int32_t> w_map_cp = m_w_map.clone();
77  m_w_map.resize_vector(w_map_cp.size() + ftype->get_w_dim());
78 
79  for (int32_t mi = 0; mi < w_map_cp.size(); mi++)
80  {
81  m_w_map[mi] = w_map_cp[mi];
82  }
83  // add new mapping in the end
84  for (int32_t mi = w_map_cp.size(); mi < m_w_map.size(); mi++)
85  {
86  m_w_map[mi] = id;
87  }
88 
89  // push factor type
90  m_factor_types->push_back(ftype);
91 
92  // initialize w cache
93  fparams_to_w();
94 
95  if (m_verbose)
96  {
97  m_w_map.display_vector("add_factor_type(): m_w_map");
98  }
99 }
100 
101 void CFactorGraphModel::del_factor_type(const int32_t ftype_id)
102 {
103  int w_dim = 0;
104  // delete from m_factor_types
105  for (int32_t fi = 0; fi < m_factor_types->get_num_elements(); ++fi)
106  {
107  CFactorType* ftype = dynamic_cast<CFactorType*>(m_factor_types->get_element(fi));
108  if (ftype_id == ftype->get_type_id())
109  {
110  w_dim = ftype->get_w_dim();
111  SG_UNREF(ftype);
113  break;
114  }
115 
116  SG_UNREF(ftype);
117  }
118 
119  ASSERT(w_dim != 0);
120 
121  SGVector<int32_t> w_map_cp = m_w_map.clone();
122  m_w_map.resize_vector(w_map_cp.size() - w_dim);
123 
124  int ind = 0;
125  for (int32_t mi = 0; mi < w_map_cp.size(); mi++)
126  {
127  if (w_map_cp[mi] == ftype_id)
128  continue;
129 
130  m_w_map[ind++] = w_map_cp[mi];
131  }
132 
133  ASSERT(ind == m_w_map.size());
134 }
135 
137 {
139  return m_factor_types;
140 }
141 
142 CFactorType* CFactorGraphModel::get_factor_type(const int32_t ftype_id) const
143 {
144  for (int32_t fi = 0; fi < m_factor_types->get_num_elements(); ++fi)
145  {
146  CFactorType* ftype = dynamic_cast<CFactorType*>(m_factor_types->get_element(fi));
147  if (ftype_id == ftype->get_type_id())
148  return ftype;
149 
150  SG_UNREF(ftype);
151  }
152 
153  return NULL;
154 }
155 
157 {
158  return m_w_map.clone();
159 }
160 
162 {
163  return m_w_map.find(ftype_id);
164 }
165 
167 {
168  return m_w_map.size();
169 }
170 
172 {
173  REQUIRE(m_factor_types != NULL, "%s::fparams_to_w(): no factor types!\n", get_name());
174 
175  if (m_w_cache.size() != get_dim())
177 
178  int32_t offset = 0;
179  for (int32_t fi = 0; fi < m_factor_types->get_num_elements(); ++fi)
180  {
181  CFactorType* ftype = dynamic_cast<CFactorType*>(m_factor_types->get_element(fi));
182  int32_t w_dim = ftype->get_w_dim();
183  offset += w_dim;
184  SGVector<float64_t> fw = ftype->get_w();
186 
187  ASSERT(fw_map.size() == fw.size());
188 
189  for (int32_t wi = 0; wi < w_dim; wi++)
190  m_w_cache[fw_map[wi]] = fw[wi];
191 
192  SG_UNREF(ftype);
193  }
194 
195  ASSERT(offset == m_w_cache.size());
196 
197  return m_w_cache;
198 }
199 
201 {
202  // if nothing changed
203  if (m_w_cache.equals(w))
204  return;
205 
206  if (m_verbose)
207  SG_SPRINT("****** update m_w_cache!\n");
208 
209  ASSERT(w.size() == m_w_cache.size());
210  m_w_cache = w.clone();
211 
212  int32_t offset = 0;
213  for (int32_t fi = 0; fi < m_factor_types->get_num_elements(); ++fi)
214  {
215  CFactorType* ftype = dynamic_cast<CFactorType*>(m_factor_types->get_element(fi));
216  int32_t w_dim = ftype->get_w_dim();
217  offset += w_dim;
218  SGVector<float64_t> fw(w_dim);
220 
221  for (int32_t wi = 0; wi < w_dim; wi++)
222  fw[wi] = m_w_cache[fw_map[wi]];
223 
224  ftype->set_w(fw);
225  SG_UNREF(ftype);
226  }
227 
228  ASSERT(offset == m_w_cache.size());
229 }
230 
232 {
233  // factor graph instance
235  CFactorGraph* fg = mf->get_sample(feat_idx);
236 
237  // ground truth states
239  SGVector<int32_t> states = fg_states->get_data();
240 
241  // initialize psi
243  psi.zero();
244 
245  // construct unnormalized psi
246  CDynamicObjectArray* facs = fg->get_factors();
247  for (int32_t fi = 0; fi < facs->get_num_elements(); ++fi)
248  {
249  CFactor* fac = dynamic_cast<CFactor*>(facs->get_element(fi));
250  CTableFactorType* ftype = fac->get_factor_type();
251  int32_t id = ftype->get_type_id();
253 
254  ASSERT(w_map.size() == ftype->get_w_dim());
255 
256  SGVector<float64_t> dat = fac->get_data();
257  int32_t dat_size = dat.size();
258 
259  ASSERT(w_map.size() == dat_size * ftype->get_num_assignments());
260 
261  int32_t ei = ftype->index_from_universe_assignment(states, fac->get_variables());
262  for (int32_t di = 0; di < dat_size; di++)
263  psi[w_map[ei*dat_size + di]] += dat[di];
264 
265  SG_UNREF(ftype);
266  SG_UNREF(fac);
267  }
268 
269  // negation (-E(x,y) = <w,phi(x,y)>)
270  psi.scale(-1.0);
271 
272  SG_UNREF(facs);
273  SG_UNREF(fg);
274 
275  return psi;
276 }
277 
278 // E(x_i, y; w) - E(x_i, y_i; w) >= L(y_i, y) - xi_i
279 // xi_i >= max oracle
280 // max oracle := argmax_y { L(y_i, y) - E(x_i, y; w) + E(x_i, y_i; w) }
281 // := argmin_y { -L(y_i, y) + E(x_i, y; w) } - E(x_i, y_i; w)
282 // we do energy minimization in inference, so get back to max oracle value is:
283 // [ L(y_i, y_star) - E(x_i, y_star; w) ] + E(x_i, y_i; w)
284 CResultSet* CFactorGraphModel::argmax(SGVector<float64_t> w, int32_t feat_idx, bool const training)
285 {
286  // factor graph instance
288  CFactorGraph* fg = mf->get_sample(feat_idx);
289 
290  // prepare factor graph
291  fg->connect_components();
292  if (m_inf_type == TREE_MAX_PROD)
293  {
294  ASSERT(fg->is_tree_graph());
295  }
296 
297  if (m_verbose)
298  SG_SPRINT("\n------ example %d\n", feat_idx);
299 
300  // update factor parameters
301  w_to_fparams(w);
302  fg->compute_energies();
303 
304  if (m_verbose)
305  {
306  SG_SPRINT("energy table before loss-aug:\n");
307  fg->evaluate_energies();
308  }
309 
310  // prepare CResultSet
311  CResultSet* ret = new CResultSet();
312  SG_REF(ret);
313  ret->psi_computed = true;
314 
315  // y_truth
316  CFactorGraphObservation* y_truth =
318 
319  SGVector<int32_t> states_gt = y_truth->get_data();
320 
321  // E(x_i, y_i; w)
322  ret->psi_truth = get_joint_feature_vector(feat_idx, y_truth);
323  float64_t energy_gt = fg->evaluate_energy(states_gt);
324  ret->score = energy_gt;
325 
326  // - min_y [ E(x_i, y; w) - delta(y_i, y) ]
327  if (training)
328  {
329  fg->loss_augmentation(y_truth); // wrong assignments -delta()
330 
331  if (m_verbose)
332  {
333  SG_SPRINT("energy table after loss-aug:\n");
334  fg->evaluate_energies();
335  }
336  }
337 
338  CMAPInference infer_met(fg, m_inf_type);
339  infer_met.inference();
340 
341  // y_star
342  CFactorGraphObservation* y_star = infer_met.get_structured_outputs();
343  SGVector<int32_t> states_star = y_star->get_data();
344 
345  ret->argmax = y_star;
346  ret->psi_pred = get_joint_feature_vector(feat_idx, y_star);
347  float64_t l_energy_pred = fg->evaluate_energy(states_star);
348  ret->score -= l_energy_pred;
349  ret->delta = delta_loss(y_truth, y_star);
350 
351  if (m_verbose)
352  {
353  float64_t dot_pred = linalg::dot(w, ret->psi_pred);
354  float64_t dot_truth = linalg::dot(w, ret->psi_truth);
355  float64_t slack = dot_pred + ret->delta - dot_truth;
356 
357  SG_SPRINT("\n");
358  w.display_vector("w");
359 
360  ret->psi_pred.display_vector("psi_pred");
361  states_star.display_vector("state_pred");
362 
363  SG_SPRINT("dot_pred = %f, energy_pred = %f, delta = %f\n\n", dot_pred, l_energy_pred, ret->delta);
364 
365  ret->psi_truth.display_vector("psi_truth");
366  states_gt.display_vector("state_gt");
367 
368  SG_SPRINT("dot_truth = %f, energy_gt = %f\n\n", dot_truth, energy_gt);
369 
370  SG_SPRINT("slack = %f, score = %f\n\n", slack, ret->score);
371  }
372 
373  SG_UNREF(y_truth);
374  SG_UNREF(fg);
375 
376  return ret;
377 }
378 
380 {
383  SGVector<int32_t> s_truth = y_truth->get_data();
384  SGVector<int32_t> s_pred = y_pred->get_data();
385 
386  ASSERT(s_pred.size() == s_truth.size());
387 
388  float64_t loss = 0.0;
389  for (int32_t si = 0; si < s_pred.size(); si++)
390  {
391  if (s_pred[si] != s_truth[si])
392  loss += y_truth->get_loss_weights()[si];
393  }
394 
395  return loss;
396 }
397 
399 {
400 }
401 
403  float64_t regularization,
411 {
413  REQUIRE(m_factor_types != NULL, "%s::init_primal_opt(): no factor types!\n", get_name());
414 
415  int32_t dim_w = get_dim();
416 
417  switch (m_inf_type)
418  {
419  case GRAPH_CUT:
420  lb.resize_vector(dim_w);
421  ub.resize_vector(dim_w);
424 
425  for (int32_t fi = 0; fi < m_factor_types->get_num_elements(); ++fi)
426  {
427  CFactorType* ftype = dynamic_cast<CFactorType*>(m_factor_types->get_element(fi));
428  int32_t w_dim = ftype->get_w_dim();
429  SGVector<int32_t> card = ftype->get_cardinalities();
430 
431  // TODO: Features of pairwise factor are assume to be 1. Consider more general case, e.g., edge features are availabel.
432  // for pairwise factors with binary labels
433  if (card.size() == 2 && card[0] == 2 && card[1] == 2)
434  {
435  REQUIRE(w_dim == 4, "GraphCut doesn't support edge features currently.");
436  SGVector<float64_t> fw = ftype->get_w();
438  ASSERT(fw_map.size() == fw.size());
439 
440  // submodularity constrain
441  // E(0,1) + E(1,0) - E(0,0) + E(1,1) > 0
442  // For pairwise factors, data term = 1,
443  // energy table indeces are defined as follows:
444  // w[0]*1 = E(0, 0)
445  // w[1]*1 = E(1, 0)
446  // w[2]*1 = E(0, 1)
447  // w[3]*1 = E(1, 1)
448  // thus, w[2] + w[1] - w[0] - w[3] > 0
449  // since factor graph model is over-parametering,
450  // the constrain can be written as w[2] > 0, w[1] > 0, w[0] = 0, w[3] = 0
451  lb[fw_map[0]] = 0;
452  ub[fw_map[0]] = 0;
453  lb[fw_map[3]] = 0;
454  ub[fw_map[3]] = 0;
455  lb[fw_map[1]] = 0;
456  lb[fw_map[2]] = 0;
457  }
458  SG_UNREF(ftype);
459  }
460  break;
461  case TREE_MAX_PROD:
462  case LOOPY_MAX_PROD:
463  case LP_RELAXATION:
464  case TRWS_MAX_PROD:
465  case GEMPLP:
466  break;
467  }
468 }
SGVector< float64_t > psi_truth
Class CFactorType defines the way of factor parameterization.
Definition: FactorType.h:24
bool equals(SGVector< T > &other)
Definition: SGVector.cpp:395
SGVector< int32_t > get_params_mapping(const int32_t ftype_id)
SGVector< int32_t > get_global_params_mapping() const
Base class of the labels used in Structured Output (SO) problems.
float64_t evaluate_energy(const SGVector< int32_t > state) const
SGVector< int32_t > get_data() const
SGVector< float64_t > fparams_to_w()
CTableFactorType * get_factor_type() const
Definition: Factor.cpp:95
bool is_tree_graph() const
static const float64_t INFTY
infinity
Definition: Math.h:1868
const SGVector< int32_t > get_variables() const
Definition: Factor.cpp:107
SGVector< float64_t > get_loss_weights() const
virtual SGVector< float64_t > get_w()
Definition: FactorType.cpp:77
CDynamicObjectArray * m_factor_types
void add_factor_type(CFactorType *ftype)
CDynamicObjectArray * get_factors() const
#define REQUIRE(x,...)
Definition: SGIO.h:181
SGVector< float64_t > get_data() const
Definition: Factor.cpp:127
T dot(const SGVector< T > &a, const SGVector< T > &b)
CFactorGraph * get_sample(index_t idx)
virtual int32_t get_dim() const
virtual SGVector< float64_t > get_joint_feature_vector(int32_t feat_idx, CStructuredData *y)
virtual int32_t get_type_id() const
Definition: FactorType.cpp:67
#define SG_REF(x)
Definition: SGObject.h:52
void scale(T alpha)
Scale vector inplace.
Definition: SGVector.cpp:898
void set_w(SGVector< float64_t > w)
Definition: FactorType.cpp:87
void w_to_fparams(SGVector< float64_t > w)
#define SG_PRINT(...)
Definition: SGIO.h:136
#define SG_SPRINT(...)
Definition: SGIO.h:165
virtual const char * get_name() const
#define ASSERT(x)
Definition: SGIO.h:176
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:124
int32_t size() const
Definition: SGVector.h:156
double float64_t
Definition: common.h:60
Class CFactorGraphObservation is used as the structured output.
CDynamicObjectArray * get_factor_types() const
static CFactorGraphFeatures * obtain_from_generic(CFeatures *base_feats)
CFactorGraphFeatures maintains an array of factor graphs, each graph is a sample, i...
static SGMatrix< T > create_identity_matrix(index_t size, T scale)
virtual CResultSet * argmax(SGVector< float64_t > w, int32_t feat_idx, bool const training=true)
Dynamic array class for CSGObject pointers that creates an array that can be used like a list or an a...
SGVector< T > clone() const
Definition: SGVector.cpp:262
static void fill_vector(T *vec, int32_t len, T value)
Definition: SGVector.cpp:279
static CFactorGraphObservation * obtain_from_generic(CStructuredData *base_data)
SGVector< float64_t > evaluate_energies() const
virtual void inference()
Class CMAPInference performs MAP inference on a factor graph. Briefly, given a factor graph model...
Definition: MAPInference.h:46
Class CStructuredModel that represents the application specific model and contains most of the applic...
#define SG_UNREF(x)
Definition: SGObject.h:53
CStructuredLabels * m_labels
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
std::unordered_map< int32_t, int32_t > factor_counts_type
virtual int32_t get_w_dim() const
Definition: FactorType.cpp:92
void del_factor_type(const int32_t ftype_id)
Class CFactorGraph a factor graph is a structured input in general.
Definition: FactorGraph.h:27
The class Features is the base class of all feature objects.
Definition: Features.h:69
SGVector< int32_t > m_w_map
Class CTableFactorType the way that store assignments of variables and energies in a table or a multi...
Definition: FactorType.h:122
CStructuredData * argmax
virtual const SGVector< int32_t > get_cardinalities() const
Definition: FactorType.cpp:97
SGVector< index_t > find(T elem)
Definition: SGVector.cpp:863
virtual CStructuredData * get_label(int32_t idx)
SGVector< float64_t > psi_pred
CSGObject * get_element(int32_t index) const
virtual float64_t delta_loss(CStructuredData *y1, CStructuredData *y2)
SGVector< float64_t > m_w_cache
virtual void loss_augmentation(CFactorGraphObservation *gt)
CFactorGraphObservation * get_structured_outputs() const
void resize_vector(int32_t n)
Definition: SGVector.cpp:301
#define SG_ADD(...)
Definition: SGObject.h:93
void display_vector(const char *name="vector", const char *prefix="") const
Definition: SGVector.cpp:411
int32_t index_from_universe_assignment(const SGVector< int32_t > assig, const SGVector< int32_t > var_index) const
Definition: FactorType.cpp:190
virtual void init_primal_opt(float64_t regularization, SGMatrix< float64_t > &A, SGVector< float64_t > a, SGMatrix< float64_t > B, SGVector< float64_t > &b, SGVector< float64_t > &lb, SGVector< float64_t > &ub, SGMatrix< float64_t > &C)
Class CFactor A factor is defined on a clique in the factor graph. Each factor can have its own data...
Definition: Factor.h:89
Base class of the components of StructuredLabels.
index_t vlen
Definition: SGVector.h:571
CFactorType * get_factor_type(const int32_t ftype_id) const
virtual int32_t get_num_assignments() const
Definition: FactorType.cpp:122

SHOGUN Machine Learning Toolbox - Documentation