SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
KMeans.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2008 Gunnar Raetsch
8  * Written (W) 2007-2009 Soeren Sonnenburg
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #ifndef _KMEANS_H__
13 #define _KMEANS_H__
14 
15 #include <shogun/lib/config.h>
16 
17 #include <stdio.h>
18 #include <shogun/lib/common.h>
19 #include <shogun/io/SGIO.h>
23 
24 namespace shogun
25 {
26 class CDistanceMachine;
27 
30 {
33 };
34 
50 class CKMeans : public CDistanceMachine
51 {
52  public:
54  CKMeans();
55 
62  CKMeans(int32_t k, CDistance* d, EKMeansMethod f);
63 
71  CKMeans(int32_t k, CDistance* d, bool kmeanspp=false, EKMeansMethod f=KMM_LLOYD);
72 
79  CKMeans(int32_t k_i, CDistance* d_i, SGMatrix<float64_t> centers_i, EKMeansMethod f=KMM_LLOYD);
80  virtual ~CKMeans();
81 
82 
84 
85 
90 
96  virtual bool load(FILE* srcfile);
97 
103  virtual bool save(FILE* dstfile);
104 
109  void set_k(int32_t p_k);
110 
115  int32_t get_k();
116 
121  void set_use_kmeanspp(bool kmpp);
122 
127  bool get_use_kmeanspp() const;
128 
133  void set_fixed_centers(bool fixed);
134 
139  bool get_fixed_centers();
140 
145  void set_max_iter(int32_t iter);
146 
152 
158 
164 
169  int32_t get_dimensions();
170 
172  virtual const char* get_name() const { return "KMeans"; }
173 
178  virtual void set_initial_centers(SGMatrix<float64_t> centers);
179 
185 
191 
196  void set_mbKMeans_batch_size(int32_t b);
197 
202  int32_t get_mbKMeans_batch_size() const;
203 
208  void set_mbKMeans_iter(int32_t t);
209 
214  int32_t get_mbKMeans_iter() const;
215 
221  void set_mbKMeans_params(int32_t b, int32_t t);
222 
223  private:
232  virtual bool train_machine(CFeatures* data=NULL);
233 
235  virtual void store_model_features();
236 
237  virtual bool train_require_labels() const { return false; }
238 
243  SGMatrix<float64_t> kmeanspp();
244  void init();
245 
250  void set_random_centers(SGVector<float64_t> weights_set, SGVector<int32_t> ClList, int32_t XSize);
251  void set_initial_centers(SGVector<float64_t> weights_set,
252  SGVector<int32_t> ClList, int32_t XSize);
253  void compute_cluster_variances();
254 
255  private:
257  int32_t max_iter;
258 
260  bool fixed_centers;
261 
263  int32_t k;
264 
266  int32_t dimensions;
267 
269  SGVector<float64_t> R;
270 
272  SGMatrix<float64_t> mus_initial;
273 
275  bool use_kmeanspp;
276 
278  int32_t batch_size;
279 
281  int32_t minib_iter;
282 
284  SGMatrix<float64_t> mus;
285 
287  EKMeansMethod train_method;
288 };
289 }
290 #endif
291 
int32_t get_mbKMeans_batch_size() const
Definition: KMeans.cpp:341
virtual const char * get_name() const
Definition: KMeans.h:172
EMachineType
Definition: Machine.h:33
int32_t get_mbKMeans_iter() const
Definition: KMeans.cpp:352
virtual bool save(FILE *dstfile)
Definition: KMeans.cpp:286
void set_mbKMeans_params(int32_t b, int32_t t)
Definition: KMeans.cpp:357
Class Distance, a base class for all the distances used in the Shogun toolbox.
Definition: Distance.h:83
void set_mbKMeans_batch_size(int32_t b)
Definition: KMeans.cpp:335
void set_mbKMeans_iter(int32_t t)
Definition: KMeans.cpp:346
EKMeansMethod
Definition: KMeans.h:29
void set_use_kmeanspp(bool kmpp)
Definition: KMeans.cpp:293
void set_k(int32_t p_k)
Definition: KMeans.cpp:303
int32_t get_dimensions()
Definition: KMeans.cpp:382
A generic DistanceMachine interface.
virtual ~CKMeans()
Definition: KMeans.cpp:64
bool get_use_kmeanspp() const
Definition: KMeans.cpp:298
SGVector< float64_t > get_radiuses()
Definition: KMeans.cpp:365
KMeans clustering, partitions the data into k (a-priori specified) clusters.
Definition: KMeans.h:50
#define MACHINE_PROBLEM_TYPE(PT)
Definition: Machine.h:116
float64_t get_max_iter()
Definition: KMeans.cpp:320
double float64_t
Definition: common.h:50
virtual bool load(FILE *srcfile)
Definition: KMeans.cpp:279
bool get_fixed_centers()
Definition: KMeans.cpp:392
void set_max_iter(int32_t iter)
Definition: KMeans.cpp:314
void set_fixed_centers(bool fixed)
Definition: KMeans.cpp:387
virtual void set_initial_centers(SGMatrix< float64_t > centers)
Definition: KMeans.cpp:68
virtual EMachineType get_classifier_type()
Definition: KMeans.h:89
The class Features is the base class of all feature objects.
Definition: Features.h:68
void set_train_method(EKMeansMethod f)
Definition: KMeans.cpp:325
EKMeansMethod get_train_method() const
Definition: KMeans.cpp:330
int32_t get_k()
Definition: KMeans.cpp:309
SGMatrix< float64_t > get_cluster_centers()
Definition: KMeans.cpp:370

SHOGUN Machine Learning Toolbox - Documentation