SHOGUN  6.1.3
DataManager.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2014 - 2017 Soumyajit De
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  */
30 
31 #ifndef DATA_MANAGER_H__
32 #define DATA_MANAGER_H__
33 
34 #include <vector>
35 #include <memory>
37 #include <shogun/lib/common.h>
38 
39 namespace shogun
40 {
41 
42 class CFeatures;
43 
44 namespace internal
45 {
46 
47 class DataFetcher;
48 class NextSamples;
49 
64 {
65 public:
71  DataManager(index_t num_distributions);
72 
77  DataManager(const DataManager& other) = delete;
78 
83  DataManager& operator=(const DataManager& other) = delete;
84 
88  ~DataManager();
89 
101  void set_blocksize(index_t blocksize);
102 
109  void set_num_blocks_per_burst(index_t num_blocks_per_burst);
110 
133  InitPerFeature samples_at(index_t i);
134 
141  CFeatures* samples_at(index_t i) const;
142 
162 
169  const index_t num_samples_at(index_t i) const;
170 
177  const index_t blocksize_at(index_t i) const;
178 
182  index_t get_num_samples() const;
183 
190  index_t get_min_blocksize() const;
191 #ifndef DOXYGEN_SHOULD_SKIP_THIS
192  void set_blockwise(bool blockwise);
193  const bool is_blockwise() const;
194 
195  void set_train_test_mode(bool on);
196  bool is_train_test_mode() const;
197 
198  void set_train_mode(bool on);
199  bool is_train_mode() const;
200 
201  void set_cross_validation_mode(bool on);
202  bool is_cross_validation_mode() const;
203 
204  void set_train_test_ratio(float64_t ratio);
205  float64_t get_train_test_ratio() const;
206 
207  index_t get_num_folds() const;
208 
209  void shuffle_features();
210  void unshuffle_features();
211 
212  void use_fold(index_t i);
213  void init_active_subset();
214 
215  void start();
216  NextSamples next();
217  void end();
218  void reset();
219 #endif // DOXYGEN_SHOULD_SKIP_THIS
220 private:
221  std::vector<std::unique_ptr<DataFetcher> > fetchers;
222 
223  bool train_test_mode; // -> if ON, then train/test/fold subset is used (in start()) in end() method, we remove these subsets.
224  bool cross_validation_mode; // -> if ON, then shuffle subset is used, remove it after train_test mode in end()
225  bool train_mode; // -> if train/test mode ON or cross-validation mode on, this one is used.
226  float64_t train_test_ratio;
227 
228  constexpr static bool default_train_test_mode=false;
229  constexpr static bool default_train_mode=false;
230  constexpr static bool default_cross_validation_mode=false;
231  constexpr static float64_t default_train_test_ratio=1.0;
232 };
233 
234 }
235 
236 }
237 
238 #endif // DATA_MANAGER_H__
index_t get_num_samples() const
Definition: DataManager.cpp:59
DataManager(index_t num_distributions)
Definition: DataManager.cpp:43
index_t & num_samples_at(index_t i)
int32_t index_t
Definition: common.h:72
void set_blocksize(index_t blocksize)
Definition: DataManager.cpp:91
InitPerFeature samples_at(index_t i)
double float64_t
Definition: common.h:60
void set_num_blocks_per_burst(index_t num_blocks_per_burst)
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
DataManager & operator=(const DataManager &other)=delete
The class Features is the base class of all feature objects.
Definition: Features.h:69
Class DataManager for fetching/streaming test data block-wise. It can handle data coming from multipl...
Definition: DataManager.h:63
index_t get_min_blocksize() const
Definition: DataManager.cpp:72
class NextSamples is the return type for next() call in DataManager. If there are no more samples (fr...
Definition: NextSamples.h:68
const index_t blocksize_at(index_t i) const

SHOGUN Machine Learning Toolbox - Documentation