SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Gaussian.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2011 Alesis Novik
8  * Written (W) 2014 Parijat Mazumdar
9  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
10  */
11 #include <shogun/lib/config.h>
12 
13 #ifdef HAVE_LAPACK
14 
17 #include <shogun/base/Parameter.h>
18 
19 using namespace shogun;
20 
21 CGaussian::CGaussian() : CDistribution(), m_constant(0), m_d(), m_u(), m_mean(), m_cov_type(FULL)
22 {
23  register_params();
24 }
25 
27  ECovType cov_type) : CDistribution(), m_d(), m_u(), m_cov_type(cov_type)
28 {
29  ASSERT(mean.vlen==cov.num_rows)
30  ASSERT(cov.num_rows==cov.num_cols)
31 
32  m_mean=mean;
33 
34  if (cov.num_rows==1)
36 
37  decompose_cov(cov);
38  init();
39  register_params();
40 }
41 
43 {
45  switch (m_cov_type)
46  {
47  case FULL:
48  case DIAG:
49  for (int32_t i=0; i<m_d.vlen; i++)
51  break;
52  case SPHERICAL:
54  break;
55  }
56 }
57 
59 {
60 }
61 
63 {
64  // init features with data if necessary and assure type is correct
65  if (data)
66  {
67  if (!data->has_property(FP_DOT))
68  SG_ERROR("Specified features are not of type CDotFeatures\n")
69  set_features(data);
70  }
71 
72  CDotFeatures* dotdata=(CDotFeatures *) data;
73  m_mean=dotdata->get_mean();
74  SGMatrix<float64_t> cov=dotdata->get_cov();
75  decompose_cov(cov);
76  init();
77  return true;
78 }
79 
81 {
82  switch (m_cov_type)
83  {
84  case FULL:
86  case DIAG:
87  return m_d.vlen+m_mean.vlen;
88  case SPHERICAL:
89  return 1+m_mean.vlen;
90  }
91  return 0;
92 }
93 
95 {
97  return 0;
98 }
99 
100 float64_t CGaussian::get_log_derivative(int32_t num_param, int32_t num_example)
101 {
103  return 0;
104 }
105 
107 {
109  SGVector<float64_t> v=((CDotFeatures *)features)->get_computed_dot_feature_vector(num_example);
110  float64_t answer=compute_log_PDF(v);
111  return answer;
112 }
113 
115 {
116  SG_DEBUG("To be implemented")
117  // Supplied with the belongingness values this will compute the updated mean and co-variance matrix
118 }
119 
121 {
123  ASSERT(point.vlen == m_mean.vlen)
124  float64_t* difference=SG_MALLOC(float64_t, m_mean.vlen);
125  memcpy(difference, point.vector, sizeof(float64_t)*m_mean.vlen);
126 
127  for (int32_t i = 0; i < m_mean.vlen; i++)
128  difference[i] -= m_mean.vector[i];
129 
130  float64_t answer=m_constant;
131 
132  if (m_cov_type==FULL)
133  {
134  float64_t* temp_holder=SG_MALLOC(float64_t, m_d.vlen);
135  cblas_dgemv(CblasRowMajor, CblasNoTrans, m_d.vlen, m_d.vlen,
136  1, m_u.matrix, m_d.vlen, difference, 1, 0, temp_holder, 1);
137 
138  for (int32_t i=0; i<m_d.vlen; i++)
139  answer+=temp_holder[i]*temp_holder[i]/m_d.vector[i];
140 
141  SG_FREE(temp_holder);
142  }
143  else if (m_cov_type==DIAG)
144  {
145  for (int32_t i=0; i<m_mean.vlen; i++)
146  answer+=difference[i]*difference[i]/m_d.vector[i];
147  }
148  else
149  {
150  for (int32_t i=0; i<m_mean.vlen; i++)
151  answer+=difference[i]*difference[i]/m_d.vector[0];
152  }
153 
154  SG_FREE(difference);
155 
156  return -0.5*answer;
157 }
158 
160 {
161  return m_mean;
162 }
163 
165 {
166  if (mean.vlen==1)
168 
169  m_mean=mean;
170 }
171 
173 {
174  ASSERT(cov.num_rows==cov.num_cols)
175  ASSERT(cov.num_rows==m_mean.vlen)
176  decompose_cov(cov);
177  init();
178 }
179 
181 {
182  m_d = d;
183  init();
184 }
185 
187 {
188  float64_t* cov=SG_MALLOC(float64_t, m_mean.vlen*m_mean.vlen);
189  memset(cov, 0, sizeof(float64_t)*m_mean.vlen*m_mean.vlen);
190 
191  if (m_cov_type==FULL)
192  {
193  if (!m_u.matrix)
194  SG_ERROR("Unitary matrix not set\n")
195 
196  float64_t* temp_holder=SG_MALLOC(float64_t, m_d.vlen*m_d.vlen);
197  float64_t* diag_holder=SG_MALLOC(float64_t, m_d.vlen*m_d.vlen);
198  memset(diag_holder, 0, sizeof(float64_t)*m_d.vlen*m_d.vlen);
199  for(int32_t i=0; i<m_d.vlen; i++)
200  diag_holder[i*m_d.vlen+i]=m_d.vector[i];
201 
202  cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans,
204  diag_holder, m_d.vlen, 0, temp_holder, m_d.vlen);
205  cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
206  m_d.vlen, m_d.vlen, m_d.vlen, 1, temp_holder, m_d.vlen,
207  m_u.matrix, m_d.vlen, 0, cov, m_d.vlen);
208 
209  SG_FREE(diag_holder);
210  SG_FREE(temp_holder);
211  }
212  else if (m_cov_type==DIAG)
213  {
214  for (int32_t i=0; i<m_d.vlen; i++)
215  cov[i*m_d.vlen+i]=m_d.vector[i];
216  }
217  else
218  {
219  for (int32_t i=0; i<m_mean.vlen; i++)
220  cov[i*m_mean.vlen+i]=m_d.vector[0];
221  }
222  return SGMatrix<float64_t>(cov, m_mean.vlen, m_mean.vlen, false);//fix needed
223 }
224 
225 void CGaussian::register_params()
226 {
227  m_parameters->add(&m_u, "m_u", "Unitary matrix.");
228  m_parameters->add(&m_d, "m_d", "Diagonal.");
229  m_parameters->add(&m_mean, "m_mean", "Mean.");
230  m_parameters->add(&m_constant, "m_constant", "Constant part.");
231  m_parameters->add((machine_int_t*)&m_cov_type, "m_cov_type", "Covariance type.");
232 }
233 
234 void CGaussian::decompose_cov(SGMatrix<float64_t> cov)
235 {
236  switch (m_cov_type)
237  {
238  case FULL:
240  memcpy(m_u.matrix, cov.matrix, sizeof(float64_t)*cov.num_rows*cov.num_rows);
241 
243  m_d.vlen=cov.num_rows;
244  m_u.num_rows=cov.num_rows;
245  m_u.num_cols=cov.num_rows;
246  break;
247  case DIAG:
248  m_d.vector=SG_MALLOC(float64_t, cov.num_rows);
249 
250  for (int32_t i=0; i<cov.num_rows; i++)
251  m_d.vector[i]=cov.matrix[i*cov.num_rows+i];
252 
253  m_d.vlen=cov.num_rows;
254  break;
255  case SPHERICAL:
256  m_d.vector=SG_MALLOC(float64_t, 1);
257 
258  m_d.vector[0]=cov.matrix[0];
259  m_d.vlen=1;
260  break;
261  }
262 }
263 
265 {
266  SG_DEBUG("Entering\n");
267  float64_t* r_matrix=SG_MALLOC(float64_t, m_mean.vlen*m_mean.vlen);
268  memset(r_matrix, 0, m_mean.vlen*m_mean.vlen*sizeof(float64_t));
269 
270  switch (m_cov_type)
271  {
272  case FULL:
273  case DIAG:
274  for (int32_t i=0; i<m_mean.vlen; i++)
275  r_matrix[i*m_mean.vlen+i]=CMath::sqrt(m_d.vector[i]);
276 
277  break;
278  case SPHERICAL:
279  for (int32_t i=0; i<m_mean.vlen; i++)
280  r_matrix[i*m_mean.vlen+i]=CMath::sqrt(m_d.vector[0]);
281 
282  break;
283  }
284 
285  float64_t* random_vec=SG_MALLOC(float64_t, m_mean.vlen);
286 
287  for (int32_t i=0; i<m_mean.vlen; i++)
288  random_vec[i]=CMath::randn_double();
289 
290  if (m_cov_type==FULL)
291  {
292  float64_t* temp_matrix=SG_MALLOC(float64_t, m_d.vlen*m_d.vlen);
293  cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
295  r_matrix, m_d.vlen, 0, temp_matrix, m_d.vlen);
296  SG_FREE(r_matrix);
297  r_matrix=temp_matrix;
298  }
299 
300  float64_t* samp=SG_MALLOC(float64_t, m_mean.vlen);
301 
302  cblas_dgemv(CblasRowMajor, CblasNoTrans, m_mean.vlen, m_mean.vlen,
303  1, r_matrix, m_mean.vlen, random_vec, 1, 0, samp, 1);
304 
305  for (int32_t i=0; i<m_mean.vlen; i++)
306  samp[i]+=m_mean.vector[i];
307 
308  SG_FREE(random_vec);
309  SG_FREE(r_matrix);
310 
311  SG_DEBUG("Leaving\n");
312  return SGVector<float64_t>(samp, m_mean.vlen, false);//fix needed
313 }
314 
316 {
317  if (!distribution)
318  return NULL;
319 
320  CGaussian* casted=dynamic_cast<CGaussian*>(distribution);
321  if (!casted)
322  return NULL;
323 
324  /* since an additional reference is returned */
325  SG_REF(casted);
326  return casted;
327 }
328 
329 #endif // HAVE_LAPACK
SGVector< float64_t > sample()
Definition: Gaussian.cpp:264
float64_t m_constant
Definition: Gaussian.h:238
virtual void set_features(CFeatures *f)
Definition: Distribution.h:160
Gaussian distribution interface.
Definition: Gaussian.h:50
virtual bool train(CFeatures *data=NULL)
Definition: Gaussian.cpp:62
static float64_t randn_double()
Definition: Math.h:692
#define SG_ERROR(...)
Definition: SGIO.h:130
#define SG_NOTIMPLEMENTED
Definition: SGIO.h:140
Parameter * m_parameters
Definition: SGObject.h:470
virtual float64_t compute_log_PDF(SGVector< float64_t > point)
Definition: Gaussian.cpp:120
Base class Distribution from which all methods implementing a distribution are derived.
Definition: Distribution.h:44
ECovType m_cov_type
Definition: Gaussian.h:246
Features that support dot products among other operations.
Definition: DotFeatures.h:44
full covariance
Definition: Gaussian.h:36
spherical covariance
Definition: Gaussian.h:40
virtual void update_params_em(SGVector< float64_t > alpha_k)
Definition: Gaussian.cpp:114
virtual SGVector< float64_t > get_mean()
void add(bool *param, const char *name, const char *description="")
Definition: Parameter.cpp:37
SGMatrix< float64_t > m_u
Definition: Gaussian.h:242
#define ASSERT(x)
Definition: SGIO.h:202
virtual SGVector< float64_t > get_mean()
Definition: Gaussian.cpp:159
static SGVector< float64_t > compute_eigenvectors(SGMatrix< float64_t > matrix)
Definition: SGMatrix.cpp:851
virtual float64_t get_log_model_parameter(int32_t num_param)
Definition: Gaussian.cpp:94
static CGaussian * obtain_from_generic(CDistribution *distribution)
Definition: Gaussian.cpp:315
virtual void set_cov(SGMatrix< float64_t > cov)
Definition: Gaussian.cpp:172
double float64_t
Definition: common.h:50
ECovType
Definition: Gaussian.h:33
#define SG_REF(x)
Definition: SGRefObject.h:34
SGVector< float64_t > m_mean
Definition: Gaussian.h:244
index_t num_rows
Definition: SGMatrix.h:298
virtual SGMatrix< float64_t > get_cov()
Definition: Gaussian.cpp:186
#define M_PI
workaround for log2 being a define on cygwin
Definition: Math.h:58
index_t num_cols
Definition: SGMatrix.h:300
virtual ~CGaussian()
Definition: Gaussian.cpp:58
diagonal covariance
Definition: Gaussian.h:38
virtual float64_t get_log_likelihood_example(int32_t num_example)
Definition: Gaussian.cpp:106
#define SG_DEBUG(...)
Definition: SGIO.h:108
int machine_int_t
Definition: common.h:59
The class Features is the base class of all feature objects.
Definition: Features.h:68
static float64_t log(float64_t v)
Definition: Math.h:505
virtual void set_mean(const SGVector< float64_t > mean)
Definition: Gaussian.cpp:164
static float32_t sqrt(float32_t x)
x^0.5
Definition: Math.h:330
bool has_property(EFeatureProperty p) const
Definition: Features.cpp:292
virtual int32_t get_num_model_parameters()
Definition: Gaussian.cpp:80
index_t vlen
Definition: SGVector.h:707
virtual SGMatrix< float64_t > get_cov()
virtual float64_t get_log_derivative(int32_t num_param, int32_t num_example)
Definition: Gaussian.cpp:100
void set_d(const SGVector< float64_t > d)
Definition: Gaussian.cpp:180
SGVector< float64_t > m_d
Definition: Gaussian.h:240

SHOGUN Machine Learning Toolbox - Documentation