SHOGUN  3.2.1
 全部  命名空间 文件 函数 变量 类型定义 枚举 枚举值 友元 宏定义  
KLApproxDiagonalInferenceMethod.cpp
浏览该文件的文档.
1  /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2014 Wu Lin
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  *
30  * Code adapted from
31  * http://hannes.nickisch.org/code/approxXX.tar.gz
32  * and Gaussian Process Machine Learning Toolbox
33  * http://www.gaussianprocess.org/gpml/code/matlab/doc/
34  * and the reference paper is
35  * Challis, Edward, and David Barber.
36  * "Concave Gaussian variational approximations for inference in large-scale Bayesian linear models."
37  * International conference on Artificial Intelligence and Statistics. 2011.
38  *
39  * This code specifically adapted from function in approxKL.m and infKL.m
40  */
41 
43 
44 #ifdef HAVE_EIGEN3
48 
49 using namespace Eigen;
50 
51 namespace shogun
52 {
53 
54 CKLApproxDiagonalInferenceMethod::CKLApproxDiagonalInferenceMethod() : CKLLowerTriangularInferenceMethod()
55 {
56  init();
57 }
58 
60  CFeatures* feat, CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod)
61  : CKLLowerTriangularInferenceMethod(kern, feat, m, lab, mod)
62 {
63  init();
64 }
65 
66 void CKLApproxDiagonalInferenceMethod::init()
67 {
68  SG_ADD(&m_InvK, "invK",
69  "The K^{-1} matrix",
71 }
72 
74 {
83  update();
84 
85  index_t len=m_mu.vlen;
86  SGVector<float64_t> result(len);
87 
88  Map<VectorXd> eigen_result(result.vector, len);
89  Map<VectorXd> eigen_alpha(m_alpha.vector, len);
90 
91  eigen_result=eigen_alpha;
92  return result;
93 }
94 
96 {
97 }
98 
100 {
101  index_t len=m_mean_vec.vlen;
102  Map<VectorXd> eigen_mean(m_mean_vec.vector, m_mean_vec.vlen);
103  Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
104  Map<VectorXd> eigen_alpha(m_alpha.vector, len);
105 
106  Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
107  //mu=K*alpha+m
108  eigen_mu=eigen_K*CMath::sq(m_scale)*eigen_alpha+eigen_mean;
109 
110  Map<VectorXd> eigen_log_v(m_alpha.vector+len, m_alpha.vlen-len);
111  Map<VectorXd> eigen_s2(m_s2.vector, m_s2.vlen);
112  //s2=sum(C.*C,2);
113  eigen_s2=eigen_log_v.array().exp();
114 
117 }
118 
120 {
121  REQUIRE(gradient.vlen==m_alpha.vlen,
122  "The length of gradients (%d) should the same as the length of parameters (%d)\n",
123  gradient.vlen, m_alpha.vlen);
124 
125  Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
126  Map<MatrixXd> eigen_InvK(m_InvK.matrix, m_InvK.num_rows, m_InvK.num_cols);
127 
128  index_t len=m_mu.vlen;
129  Map<VectorXd> eigen_alpha(m_alpha.vector, len);
130  Map<VectorXd> eigen_s2(m_s2.vector, m_s2.vlen);
131 
133  //[a,df,dV] = a_related2(mu,s2,y,lik);
134  TParameter* s2_param=lik->m_parameters->get_parameter("sigma2");
136  Map<VectorXd> eigen_dv(dv.vector, dv.vlen);
137 
138  TParameter* mu_param=lik->m_parameters->get_parameter("mu");
140  Map<VectorXd> eigen_df(df.vector, df.vlen);
141 
142  Map<VectorXd> eigen_dnlz_alpha(gradient.vector, len);
143  //dnlZ_alpha = -K*(df-alpha);
144  eigen_dnlz_alpha=eigen_K*CMath::sq(m_scale)*(-eigen_df+eigen_alpha);
145 
146  Map<VectorXd> eigen_dnlz_log_v(gradient.vector+len, gradient.vlen-len);
147 
148  eigen_dnlz_log_v=(eigen_InvK.diagonal().array()-(1.0/eigen_s2.array()));
149  eigen_dnlz_log_v=(0.5*eigen_dnlz_log_v.array())-eigen_dv.array();
150  eigen_dnlz_log_v=eigen_dnlz_log_v.array()*eigen_s2.array();
151 
152 }
153 
155 {
156  Map<VectorXd> eigen_alpha(m_alpha.vector, m_mu.vlen);
157  Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
158  Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
159  //get mean vector and create eigen representation of it
160  Map<VectorXd> eigen_mean(m_mean_vec.vector, m_mean_vec.vlen);
161  Map<VectorXd> eigen_log_v(m_alpha.vector+m_mu.vlen, m_alpha.vlen-m_mu.vlen);
162  Map<VectorXd> eigen_s2(m_s2.vector, m_s2.vlen);
163  Map<MatrixXd> eigen_InvK(m_InvK.matrix, m_InvK.num_rows, m_InvK.num_cols);
164 
167  float64_t log_det=eigen_log_v.array().sum()-m_log_det_Kernel;
168  float64_t trace=(eigen_s2.array()*eigen_InvK.diagonal().array()).sum();
169 
170  //nlZ = -a -logdet(V*inv(K))/2 -n/2 +(alpha'*K*alpha)/2 +trace(V*inv(K))/2;
171  float64_t result=-a+0.5*(-eigen_K.rows()+eigen_alpha.dot(eigen_mu-eigen_mean)+trace-log_det);
172 
173  return result;
174 }
175 
177 {
178  Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
180  Map<MatrixXd> eigen_InvK(m_InvK.matrix, m_InvK.num_rows, m_InvK.num_cols);
181  eigen_InvK=solve_inverse(MatrixXd::Identity(m_ktrtr.num_rows,m_ktrtr.num_cols));
182 
183  float64_t nlml_new=0;
184  float64_t nlml_def=0;
185 
187  index_t total_len=len*2;
188 
189  if (m_alpha.vlen == total_len)
190  {
192 
193  SGVector<float64_t> s2_tmp(m_s2.vlen);
194  Map<VectorXd> eigen_s2(s2_tmp.vector, s2_tmp.vlen);
195  eigen_s2.fill(1.0);
199  float64_t trace=eigen_InvK.diagonal().array().sum();
200  nlml_def=-a+0.5*(-eigen_K.rows()+trace+m_log_det_Kernel);
201 
202  if (nlml_new<=nlml_def)
204  }
205 
206  if (m_alpha.vlen != total_len || nlml_def<nlml_new)
207  {
208  if(m_alpha.vlen != total_len)
209  m_alpha = SGVector<float64_t>(total_len);
210  m_alpha.zero();
211 
214  }
215 
216  nlml_new=lbfgs_optimization();
217 }
218 
220 {
222  Map<MatrixXd> eigen_Sigma(m_Sigma.matrix, m_Sigma.num_rows, m_Sigma.num_cols);
223  Map<VectorXd> eigen_s2(m_s2.vector, m_s2.vlen);
224  eigen_Sigma=eigen_s2.asDiagonal();
225 }
226 
228 {
230  Map<MatrixXd> eigen_InvK_Sigma(m_InvK_Sigma.matrix, m_InvK_Sigma.num_rows, m_InvK_Sigma.num_cols);
231  Map<MatrixXd> eigen_Sigma(m_Sigma.matrix, m_Sigma.num_rows, m_Sigma.num_cols);
232  eigen_InvK_Sigma=solve_inverse(eigen_Sigma);
233 }
234 
235 } /* namespace shogun */
236 
237 #endif /* HAVE_EIGEN3 */
SGVector< float64_t > m_alpha
virtual SGVector< float64_t > get_variational_first_derivative(const TParameter *param) const =0
int32_t index_t
Definition: common.h:62
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
virtual int32_t get_num_labels() const =0
static T sum(T *vec, int32_t len)
return sum(vec)
Definition: SGVector.h:507
The variational Gaussian Likelihood base class. The variational distribution is Gaussian.
static T sq(T x)
x^2
Definition: Math.h:324
TParameter * get_parameter(int32_t idx)
Definition: Parameter.h:286
parameter struct
Definition: Parameter.h:32
#define REQUIRE(x,...)
Definition: SGIO.h:207
Parameter * m_parameters
Definition: SGObject.h:470
An abstract class of the mean function.
Definition: MeanFunction.h:28
virtual void set_variational_distribution(SGVector< float64_t > mu, SGVector< float64_t > s2, const CLabels *lab)
virtual void get_gradient_of_nlml_wrt_parameters(SGVector< float64_t > gradient)
SGMatrix< float64_t > m_Sigma
The KL approximation inference method class.
virtual float64_t lbfgs_optimization()
double float64_t
Definition: common.h:50
index_t num_rows
Definition: SGMatrix.h:298
virtual SGVector< float64_t > get_variational_expection()=0
index_t num_cols
Definition: SGMatrix.h:300
The class Features is the base class of all feature objects.
Definition: Features.h:68
SGVector< float64_t > m_mu
SGVector< float64_t > m_s2
The Kernel base class.
Definition: Kernel.h:153
virtual CVariationalGaussianLikelihood * get_variational_likelihood() const
#define SG_ADD(...)
Definition: SGObject.h:67
virtual bool parameter_hash_changed()
Definition: SGObject.cpp:209
The Likelihood model base class.
SGMatrix< float64_t > m_ktrtr
index_t vlen
Definition: SGVector.h:707

SHOGUN 机器学习工具包 - 项目文档