SHOGUN  4.1.0
SparseVGInferenceMethod.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (W) 2015 Wu Lin
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  *
30  * This code specifically adapted from function in varsgpLikelihood.m and varsgpPredict.m
31  *
32  * The reference paper is
33  * Titsias, Michalis K.
34  * "Variational learning of inducing variables in sparse Gaussian processes."
35  * International Conference on Artificial Intelligence and Statistics. 2009.
36  *
37  */
39 
40 #ifdef HAVE_EIGEN3
41 
46 
47 using namespace shogun;
48 using namespace Eigen;
49 
51 {
52  init();
53 }
54 
56  CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod, CFeatures* lat)
57  : CSingleSparseInferenceBase(kern, feat, m, lab, mod, lat)
58 {
59  init();
60 }
61 
62 void CSparseVGInferenceMethod::init()
63 {
64  m_yy=0.0;
65  m_f3=0.0;
66  m_sigma2=0.0;
67  m_trk=0.0;
73 
74  SG_ADD(&m_yy, "yy", "yy", MS_NOT_AVAILABLE);
75  SG_ADD(&m_f3, "f3", "f3", MS_NOT_AVAILABLE);
76  SG_ADD(&m_sigma2, "sigma2", "sigma2", MS_NOT_AVAILABLE);
77  SG_ADD(&m_trk, "trk", "trk", MS_NOT_AVAILABLE);
78  SG_ADD(&m_Tmm, "Tmm", "Tmm", MS_NOT_AVAILABLE);
79  SG_ADD(&m_Tnm, "Tnm", "Tnm", MS_NOT_AVAILABLE);
80  SG_ADD(&m_inv_Lm, "inv_Lm", "inv_Lm", MS_NOT_AVAILABLE);
81  SG_ADD(&m_inv_La, "inv_La", "inv_La", MS_NOT_AVAILABLE);
82  SG_ADD(&m_Knm_inv_Lm, "Knm_Inv_Lm", "Knm_Inv_Lm", MS_NOT_AVAILABLE);
83 }
84 
86 {
87 }
88 
90 {
92 
93  if (!m_gradient_update)
94  {
95  update_deriv();
96  m_gradient_update=true;
98  }
99 }
100 
102 {
103  SG_DEBUG("entering\n");
104 
106  update_chol();
107  update_alpha();
108  m_gradient_update=false;
110 
111  SG_DEBUG("leaving\n");
112 }
113 
115  CInferenceMethod* inference)
116 {
117  if (inference==NULL)
118  return NULL;
119 
121  SG_SERROR("Provided inference is not of type CSparseVGInferenceMethod!\n")
122 
123  SG_REF(inference);
124  return (CSparseVGInferenceMethod*)inference;
125 }
126 
128 {
130 
132  "SparseVG inference method can only use Gaussian likelihood function\n")
133  REQUIRE(m_labels->get_label_type()==LT_REGRESSION, "Labels must be type "
134  "of CRegressionLabels\n")
135 }
136 
138 {
140  //the inference method does not need to use this
141  return SGVector<float64_t>();
142 }
143 
145 {
147  update();
148 
151 
152  //F012 =-(model.n-model.m)*model.Likelihood.logtheta-0.5*model.n*log(2*pi)-(0.5/sigma2)*(model.yy)-sum(log(diag(La)));
155  0.5*m_yy/(m_sigma2)-eigen_inv_La.diagonal().array().log().sum();
156 
157  //F3 = (0.5/sigma2)*(yKnmInvLmInvLa*yKnmInvLmInvLa');
158  float64_t neg_f3=-m_f3;
159 
160  //model.TrKnn = sum(model.diagKnn);
161  //TrK = - (0.5/sigma2)*(model.TrKnn - sum(diag(C)) );
162  float64_t neg_trk=-m_trk;
163 
164  //F = F012 + F3 + TrK;
165  //F = - F;
166  return neg_f012+neg_f3+neg_trk;
167 }
168 
170 {
171  // get the sigma variable from the Gaussian likelihood model
173  float64_t sigma=lik->get_sigma();
174  SG_UNREF(lik);
175  m_sigma2=sigma*sigma;
176 
177  //m-by-m matrix
179 
180  //m-by-n matrix
182 
184 
185  //Lm = chol(model.Kmm + model.jitter*eye(model.m))
186  LLT<MatrixXd> Luu(eigen_kuu*CMath::exp(m_log_scale*2.0)+CMath::exp(m_log_ind_noise)*MatrixXd::Identity(
188  m_inv_Lm=SGMatrix<float64_t>(Luu.rows(), Luu.cols());
190  //invLm = Lm\eye(model.m);
191  eigen_inv_Lm=Luu.matrixU().solve(MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
192 
195  //KnmInvLm = model.Knm*invLm;
196  eigen_Knm_inv_Lm=(eigen_ktru.transpose()*CMath::exp(m_log_scale*2.0))*eigen_inv_Lm;
197 
200  //C = KnmInvLm'*KnmInvLm;
201  eigen_C=eigen_Knm_inv_Lm.transpose()*eigen_Knm_inv_Lm;
202 
205  //A = sigma2*eye(model.m) + C;
206  LLT<MatrixXd> chol_A(m_sigma2*MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols)+eigen_C);
207 
208  //La = chol(A);
209  //invLa = La\eye(model.m);
210  eigen_inv_La=chol_A.matrixU().solve(MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
211 
212  //L=-invLm*invLm' + sigma2*(invLm*invLa*invLa'*invLm');
215  eigen_L=eigen_inv_Lm*(
216  m_sigma2*eigen_inv_La*eigen_inv_La.transpose()-MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols)
217  )*eigen_inv_Lm.transpose();
218 
219  //TrK = - (0.5/sigma2)*(model.TrKnn - sum(diag(C)) );
220  m_trk=-0.5/(m_sigma2)*(eigen_ktrtr_diag.array().sum()*CMath::exp(m_log_scale*2.0)
221  -eigen_C.diagonal().array().sum());
222 }
223 
225 {
229 
230  SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
231  Map<VectorXd> eigen_y(y.vector, y.vlen);
233  Map<VectorXd> eigen_m(m.vector, m.vlen);
234 
235  //yKnmInvLm = (model.y'*KnmInvLm);
236  //yKnmInvLmInvLa = yKnmInvLm*invLa;
237  VectorXd y_cor=eigen_y-eigen_m;
238  VectorXd eigen_y_Knm_inv_Lm_inv_La_transpose=eigen_inv_La.transpose()*(
239  eigen_Knm_inv_Lm.transpose()*y_cor);
240  //alpha = invLm*invLa*yKnmInvLmInvLa';
242  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
243  eigen_alpha=eigen_inv_Lm*eigen_inv_La*eigen_y_Knm_inv_Lm_inv_La_transpose;
244 
245  m_yy=y_cor.dot(y_cor);
246  //F3 = (0.5/sigma2)*(yKnmInvLmInvLa*yKnmInvLmInvLa');
247  m_f3=0.5*eigen_y_Knm_inv_Lm_inv_La_transpose.dot(eigen_y_Knm_inv_Lm_inv_La_transpose)/m_sigma2;
248 }
249 
251 {
254  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
256  //m-by-n matrix
259 
260  //m_Tmm=SGMatrix<float64_t>(m_kuu.num_rows, m_kuu.num_cols);
262 
265 
267  float64_t sigma=lik->get_sigma();
268  SG_UNREF(lik);
269  m_sigma2=sigma*sigma;
270 
271  //invLmInvLa = invLm*invLa;
272  //invA = invLmInvLa*invLmInvLa';
273  //yKnmInvA = yKnmInvLmInvLa*invLmInvLa';
274  //invKmm = invLm*invLm';
275  //Tmm = sigma2*invA + yKnmInvA'*yKnmInvA;
276  //Tmm = invKmm - Tmm;
277  MatrixXd Tmm=-eigen_L-eigen_alpha*eigen_alpha.transpose();
278 
279  //Tnm = model.Knm*Tmm;
280  eigen_Tnm=(eigen_ktru.transpose()*CMath::exp(m_log_scale*2.0))*Tmm;
281 
282  //Tmm = Tmm - (invLm*(C*invLm'))/sigma2;
283  eigen_Tmm = Tmm - (eigen_inv_Lm*eigen_C*eigen_inv_Lm.transpose()/m_sigma2);
284 
285  SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
286  Map<VectorXd> eigen_y(y.vector, y.vlen);
288  Map<VectorXd> eigen_m(m.vector, m.vlen);
289 
290  //Tnm = Tnm + (model.y*yKnmInvA);
291  eigen_Tnm += (eigen_y-eigen_m)*eigen_alpha.transpose();
292 }
293 
295 {
297  //TODO: implement this method once I get time
298  return SGVector<float64_t>();
299 }
300 
302 {
304  //TODO: implement this method once I get time
305  return SGMatrix<float64_t>();
306 }
307 
309  const TParameter* param)
310 {
311  REQUIRE(!strcmp(param->m_name, "log_sigma"), "Can't compute derivative of "
312  "the nagative log marginal likelihood wrt %s.%s parameter\n",
313  m_model->get_name(), param->m_name)
314 
315  SGVector<float64_t> dlik(1);
316 
317  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
321  //yKnmInvLmInvLainvLa = yKnmInvLmInvLa*invLa';
322  //sigma2aux = sigma2*sum(sum(invLa.*invLa)) + yKnmInvLmInvLainvLa*yKnmInvLmInvLainvLa';
323  float64_t sigma2aux= m_sigma2*eigen_inv_La.cwiseProduct(eigen_inv_La).array().sum()+
324  eigen_alpha.transpose()*(eigen_kuu*CMath::exp(m_log_scale*2.0)
325  +CMath::exp(m_log_ind_noise)*MatrixXd::Identity(
326  m_kuu.num_rows, m_kuu.num_cols))*eigen_alpha;
327  //Dlik_neg = - (model.n-model.m) + model.yy/sigma2 - 2*F3 - sigma2aux - 2*TrK;
328 
329  dlik[0]=(m_ktru.num_cols-m_ktru.num_rows)-m_yy/m_sigma2+2.0*m_f3+sigma2aux+2.0*m_trk;
330  return dlik;
331 }
332 
334  const TParameter* param)
335 {
336  //[DXu DXunm] = kernelSparseGradInd(model, Tmm, Tnm);
337  //DXu_neg = DXu + DXunm/model.sigma2;
338 
341 
342  int32_t dim=m_inducing_features.num_rows;
343  int32_t num_samples=m_inducing_features.num_cols;
344  SGVector<float64_t>deriv_lat(dim*num_samples);
345  deriv_lat.zero();
346 
347  m_lock->lock();
348  CFeatures *inducing_features=get_inducing_features();
349  //asymtric part (related to xu and x)
350  m_kernel->init(inducing_features, m_features);
351  for(int32_t lat_idx=0; lat_idx<eigen_Tnm.cols(); lat_idx++)
352  {
353  Map<VectorXd> deriv_lat_col_vec(deriv_lat.vector+lat_idx*dim,dim);
354  //p by n
355  SGMatrix<float64_t> deriv_mat=m_kernel->get_parameter_gradient(param, lat_idx);
356  Map<MatrixXd> eigen_deriv_mat(deriv_mat.matrix, deriv_mat.num_rows, deriv_mat.num_cols);
357  //DXunm/model.sigma2;
358  deriv_lat_col_vec+=eigen_deriv_mat*(-CMath::exp(m_log_scale*2.0)/m_sigma2*eigen_Tnm.col(lat_idx));
359  }
360 
361  //symtric part (related to xu and xu)
362  m_kernel->init(inducing_features, inducing_features);
363  for(int32_t lat_lidx=0; lat_lidx<eigen_Tmm.cols(); lat_lidx++)
364  {
365  Map<VectorXd> deriv_lat_col_vec(deriv_lat.vector+lat_lidx*dim,dim);
366  //p by n
367  SGMatrix<float64_t> deriv_mat=m_kernel->get_parameter_gradient(param, lat_lidx);
368  Map<MatrixXd> eigen_deriv_mat(deriv_mat.matrix, deriv_mat.num_rows, deriv_mat.num_cols);
369  //DXu
370  deriv_lat_col_vec+=eigen_deriv_mat*(-CMath::exp(m_log_scale*2.0)*eigen_Tmm.col(lat_lidx));
371  }
372  SG_UNREF(inducing_features);
373  m_lock->unlock();
374  return deriv_lat;
375 }
376 
378  const TParameter* param)
379 {
380  REQUIRE(param, "Param not set\n");
381  REQUIRE(!strcmp(param->m_name, "log_inducing_noise"), "Can't compute derivative of "
382  "the nagative log marginal likelihood wrt %s.%s parameter\n",
383  get_name(), param->m_name)
384 
386  SGVector<float64_t> result(1);
387  result[0]=-0.5*CMath::exp(m_log_ind_noise)*eigen_Tmm.diagonal().array().sum();
388  return result;
389 }
390 
393 {
394  Map<VectorXd> eigen_ddiagKi(ddiagKi.vector, ddiagKi.vlen);
395  Map<MatrixXd> eigen_dKuui(dKuui.matrix, dKuui.num_rows, dKuui.num_cols);
396  Map<MatrixXd> eigen_dKui(dKui.matrix, dKui.num_rows, dKui.num_cols);
397 
400 
401  //[Dkern Dkernnm DTrKnn] = kernelSparseGradHyp(model, Tmm, Tnm);
402  //Dkern_neg = 0.5*Dkern + Dkernnm/model.sigma2 - (0.5/model.sigma2)*DTrKnn;
403  float64_t dkern= -0.5*eigen_dKuui.cwiseProduct(eigen_Tmm).sum()
404  -eigen_dKui.cwiseProduct(eigen_Tnm.transpose()).sum()/m_sigma2
405  +0.5*eigen_ddiagKi.array().sum()/m_sigma2;
406  return dkern;
407 }
408 
410  const TParameter* param)
411 {
412  REQUIRE(param, "Param not set\n");
413  SGVector<float64_t> result;
414  int64_t len=const_cast<TParameter *>(param)->m_datatype.get_num_elements();
415  result=SGVector<float64_t>(len);
416 
417  SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
418  Map<VectorXd> eigen_y(y.vector, y.vlen);
420  Map<VectorXd> eigen_m(m.vector, m.vlen);
421  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
422 
423  //m-by-n matrix
425 
426  for (index_t i=0; i<result.vlen; i++)
427  {
429  Map<VectorXd> eigen_dmu(dmu.vector, dmu.vlen);
430 
431  result[i]=eigen_dmu.dot(eigen_ktru.transpose()*CMath::exp(m_log_scale*2.0)
432  *eigen_alpha+(eigen_m-eigen_y))/m_sigma2;
433  }
434  return result;
435 }
436 #endif /* HAVE_EIGEN3 */
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Kernel.cpp:83
virtual void check_members() const
virtual const char * get_name() const
virtual void update_parameter_hash()
Definition: SGObject.cpp:248
Class that models Gaussian likelihood.
SGVector< float64_t > m_ktrtr_diag
Real Labels are real-valued labels.
SGVector< float64_t > m_alpha
The Inference Method base class.
virtual SGVector< float64_t > get_derivative_wrt_mean(const TParameter *param)
int32_t index_t
Definition: common.h:62
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
real valued labels (e.g. for regression, classifier outputs)
Definition: LabelTypes.h:22
virtual ELabelType get_label_type() const =0
Definition: SGMatrix.h:20
virtual ELikelihoodModelType get_model_type() const
virtual SGVector< float64_t > get_posterior_mean()
parameter struct
#define REQUIRE(x,...)
Definition: SGIO.h:206
#define SG_NOTIMPLEMENTED
Definition: SGIO.h:139
void unlock()
Definition: Lock.cpp:64
An abstract class of the mean function.
Definition: MeanFunction.h:49
SGMatrix< float64_t > m_inducing_features
#define SG_REF(x)
Definition: SGObject.h:51
virtual SGVector< float64_t > get_mean_vector(const CFeatures *features) const =0
static CSparseVGInferenceMethod * obtain_from_generic(CInferenceMethod *inference)
virtual const char * get_name() const =0
SGMatrix< float64_t > m_L
virtual SGMatrix< float64_t > get_posterior_covariance()
virtual float64_t get_negative_log_marginal_likelihood()
double float64_t
Definition: common.h:50
index_t num_rows
Definition: SGMatrix.h:376
index_t num_cols
Definition: SGMatrix.h:378
virtual float64_t get_derivative_related_cov(SGVector< float64_t > ddiagKi, SGMatrix< float64_t > dKuui, SGMatrix< float64_t > dKui)
static CGaussianLikelihood * obtain_from_generic(CLikelihoodModel *lik)
The sparse inference base class for classification and regression for 1-D labels (1D regression and b...
virtual SGVector< float64_t > get_parameter_derivative(const CFeatures *features, const TParameter *param, index_t index=-1)
Definition: MeanFunction.h:73
#define SG_UNREF(x)
Definition: SGObject.h:52
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
virtual CFeatures * get_inducing_features()
The class Features is the base class of all feature objects.
Definition: Features.h:68
virtual SGVector< float64_t > get_derivative_wrt_likelihood_model(const TParameter *param)
#define SG_SERROR(...)
Definition: SGIO.h:179
static float64_t exp(float64_t x)
Definition: Math.h:621
virtual SGMatrix< float64_t > get_parameter_gradient(const TParameter *param, index_t index=-1)
Definition: Kernel.h:732
static float64_t log(float64_t v)
Definition: Math.h:922
virtual EInferenceType get_inference_type() const
virtual SGVector< float64_t > get_diagonal_vector()
The Kernel base class.
Definition: Kernel.h:155
#define SG_ADD(...)
Definition: SGObject.h:81
The inference method class based on the Titsias&#39; variational bound. For more details, see Titsias, Michalis K. "Variational learning of inducing variables in sparse Gaussian processes." International Conference on Artificial Intelligence and Statistics. 2009.
virtual bool parameter_hash_changed()
Definition: SGObject.cpp:262
void lock()
Definition: Lock.cpp:57
The Likelihood model base class.
CLikelihoodModel * m_model
index_t vlen
Definition: SGVector.h:494
virtual SGVector< float64_t > get_derivative_wrt_inducing_noise(const TParameter *param)
virtual SGVector< float64_t > get_derivative_wrt_inducing_features(const TParameter *param)
static const float64_t PI
Definition: Math.h:2055

SHOGUN Machine Learning Toolbox - Documentation