SHOGUN  3.2.1
 全部  命名空间 文件 函数 变量 类型定义 枚举 枚举值 友元 宏定义  
DeepAutoencoder.cpp
浏览该文件的文档.
1 /*
2  * Copyright (c) 2014, Shogun Toolbox Foundation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7 
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18 
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  *
31  * Written (W) 2014 Khaled Nasr
32  */
33 
37 
41 
42 #include <string>
43 
44 using namespace shogun;
45 
47 {
48  init();
49 }
50 
53 {
54  set_layers(layers);
55  init();
56  m_sigma = sigma;
57  quick_connect();
59 }
60 
62 {
64 
65  for (int32_t i=0; i<m_num_layers; i++)
66  {
67  REQUIRE(get_layer(i)->get_num_neurons()==get_layer(m_num_layers-i-1)->get_num_neurons(),
68  "Layer %i (%i neurons) must have the same number of neurons "
69  "as layer %i (%i neurons)\n", i, get_layer(i)->get_num_neurons(),
70  m_num_layers-i-1, get_layer(m_num_layers-i-1)->get_num_neurons());
71  }
72 }
73 
75 {
76  SGMatrix<float64_t> data_matrix = features_to_matrix(data);
77 
78  int32_t num_encoding_layers = (m_num_layers-1)/2;
79  for (int32_t i=1; i<=num_encoding_layers; i++)
80  {
81  SG_INFO("Pre-training Layer %i\n", i);
82 
83  CNeuralLayer* ae_encoding_layer = NULL;
84 
85  if (strcmp(get_layer(i)->get_name(), "NeuralLinearLayer")==0)
86  ae_encoding_layer = new CNeuralLinearLayer(get_layer(i)->get_num_neurons());
87  else if (strcmp(get_layer(i)->get_name(), "NeuralLogisticLayer")==0)
88  ae_encoding_layer = new CNeuralLogisticLayer(get_layer(i)->get_num_neurons());
89  else if (strcmp(get_layer(i)->get_name(), "NeuralRectifiedLinearLayer")==0)
90  ae_encoding_layer = new CNeuralRectifiedLinearLayer(get_layer(i)->get_num_neurons());
91  else
92  SG_ERROR("Unsupported layer type (%s) for layer %i\n",
93  get_layer(i)->get_name(), i);
94 
95  CNeuralLayer* ae_decoding_layer = NULL;
96  int32_t k = m_num_layers-i;
97 
98  if (strcmp(get_layer(k)->get_name(), "NeuralLinearLayer")==0)
99  ae_decoding_layer = new CNeuralLinearLayer(get_layer(k)->get_num_neurons());
100  else if (strcmp(get_layer(k)->get_name(), "NeuralLogisticLayer")==0)
101  ae_decoding_layer = new CNeuralLogisticLayer(get_layer(k)->get_num_neurons());
102  else if (strcmp(get_layer(k)->get_name(), "NeuralRectifiedLinearLayer")==0)
103  ae_decoding_layer = new CNeuralRectifiedLinearLayer(get_layer(k)->get_num_neurons());
104  else
105  SG_ERROR("Unsupported layer type (%s) for layer %i\n",
106  get_layer(k)->get_name(), k);
107 
108  CAutoencoder ae(get_layer(i-1)->get_num_neurons(),
109  ae_encoding_layer, ae_decoding_layer, m_sigma);
110 
117  ae.epsilon = pt_epsilon[i-1];
121  ae.gd_momentum = pt_gd_momentum[i-1];
124 
125  // forward propagate the data to obtain the training data for the
126  // current autoencoder
127  for (int32_t j=0; j<i; j++)
128  get_layer(j)->set_batch_size(data_matrix.num_cols);
129  SGMatrix<float64_t> ae_input_matrix = forward_propagate(data_matrix, i-1);
130  CDenseFeatures<float64_t> ae_input_features(ae_input_matrix);
131  for (int32_t j=0; j<i-1; j++)
132  get_layer(j)->set_batch_size(1);
133 
134  ae.train(&ae_input_features);
135 
136  SGVector<float64_t> ae_params = ae.get_parameters();
137  SGVector<float64_t> encoding_layer_params = get_section(m_params, i);
138  SGVector<float64_t> decoding_layer_params = get_section(m_params, m_num_layers-i);
139 
140  for (int32_t j=0; j<ae_params.vlen;j++)
141  {
142  if (j<encoding_layer_params.vlen)
143  encoding_layer_params[j] = ae_params[j];
144  else
145  decoding_layer_params[j-encoding_layer_params.vlen] = ae_params[j];
146  }
147 
148  }
149 
150  set_batch_size(1);
151 }
152 
155 {
156  SGMatrix<float64_t> transformed = forward_propagate(data, (m_num_layers-1)/2);
157  return new CDenseFeatures<float64_t>(transformed);
158 }
159 
162 {
163  SGMatrix<float64_t> reconstructed = forward_propagate(data);
164  return new CDenseFeatures<float64_t>(reconstructed);
165 }
166 
168  CNeuralLayer* output_layer, float64_t sigma)
169 {
171  for (int32_t i=0; i<=(m_num_layers-1)/2; i++)
172  layers->append_element(get_layer(i));
173 
174  if (output_layer != NULL)
175  layers->append_element(output_layer);
176 
177  CNeuralNetwork* net = new CNeuralNetwork(layers);
178  net->quick_connect();
179  net->initialize(sigma);
180 
181  SGVector<float64_t> net_params = net->get_parameters();
182 
183  int32_t len = m_index_offsets[(m_num_layers-1)/2]
185 
186  for (int32_t i=0; i<len; i++)
187  net_params[i] = m_params[i];
188 
189  return net;
190 }
191 
193 {
194  float64_t error = CNeuralNetwork::compute_error(targets);
195 
196  if (m_contraction_coefficient != 0.0)
197 
198  for (int32_t i=1; i<=(m_num_layers-1)/2; i++)
199  error +=
200  get_layer(i)->compute_contraction_term(get_section(m_params,i));
201 
202  return error;
203 }
204 
206 {
208  for (int32_t i=1; i<=(m_num_layers-1)/2; i++)
210 }
211 
212 
213 template <class T>
214 SGVector<T> CDeepAutoencoder::get_section(SGVector<T> v, int32_t i)
215 {
216  return SGVector<T>(v.vector+m_index_offsets[i],
217  get_layer(i)->get_num_parameters(), false);
218 }
219 
220 void CDeepAutoencoder::init()
221 {
222  m_sigma = 0.01;
223 
226 
229 
232 
235 
238 
241 
243  pt_epsilon.set_const(1e-5);
244 
247 
250 
253 
256 
259 
262 
263  SG_ADD(&pt_noise_type, "pt_noise_type",
264  "Pre-training Noise Type", MS_NOT_AVAILABLE);
265  SG_ADD(&pt_noise_parameter, "pt_noise_parameter",
266  "Pre-training Noise Parameter", MS_NOT_AVAILABLE);
267  SG_ADD(&pt_contraction_coefficient, "pt_contraction_coefficient",
268  "Pre-training Contraction Coefficient", MS_NOT_AVAILABLE);
269  SG_ADD(&pt_optimization_method, "pt_optimization_method",
270  "Pre-training Optimization Method", MS_NOT_AVAILABLE);
271  SG_ADD(&pt_gd_mini_batch_size, "pt_gd_mini_batch_size",
272  "Pre-training Gradient Descent Mini-batch size", MS_NOT_AVAILABLE);
273  SG_ADD(&pt_max_num_epochs, "pt_max_num_epochs",
274  "Pre-training Max number of Epochs", MS_NOT_AVAILABLE);
275  SG_ADD(&pt_gd_learning_rate, "pt_gd_learning_rate",
276  "Pre-training Gradient descent learning rate", MS_NOT_AVAILABLE);
277  SG_ADD(&pt_gd_learning_rate_decay, "pt_gd_learning_rate_decay",
278  "Pre-training Gradient descent learning rate decay", MS_NOT_AVAILABLE);
279  SG_ADD(&pt_gd_momentum, "pt_gd_momentum",
280  "Pre-training Gradient Descent Momentum", MS_NOT_AVAILABLE);
281  SG_ADD(&pt_gd_error_damping_coeff, "pt_gd_error_damping_coeff",
282  "Pre-training Gradient Descent Error Damping Coeff", MS_NOT_AVAILABLE);
283  SG_ADD(&pt_epsilon, "pt_epsilon",
284  "Pre-training Epsilon", MS_NOT_AVAILABLE);
285  SG_ADD(&pt_l2_coefficient, "pt_l2_coefficient",
286  "Pre-training L2 regularization coeff", MS_NOT_AVAILABLE);
287  SG_ADD(&pt_l1_coefficient, "pt_l1_coefficient",
288  "Pre-training L1 regularization coeff", MS_NOT_AVAILABLE);
289 
290  SG_ADD(&m_sigma, "m_sigma", "Initialization Sigma", MS_NOT_AVAILABLE);
291 }
#define SG_INFO(...)
Definition: SGIO.h:119
SGVector< int32_t > m_index_offsets
virtual void set_contraction_coefficient(float64_t coeff)
virtual int32_t get_num_parameters()
Definition: NeuralLayer.h:243
SGVector< int32_t > pt_max_num_epochs
SGVector< float64_t > get_parameters()
Represents a single layer neural autoencoder.
Definition: Autoencoder.h:79
virtual CDenseFeatures< float64_t > * transform(CDenseFeatures< float64_t > *data)
SGVector< float64_t > pt_contraction_coefficient
EAENoiseType noise_type
Definition: Autoencoder.h:173
SGVector< float64_t > m_params
A generic multi-layer neural network.
#define SG_ERROR(...)
Definition: SGIO.h:130
#define REQUIRE(x,...)
Definition: SGIO.h:207
virtual void set_contraction_coefficient(float64_t coeff)
Definition: Autoencoder.h:136
SGMatrix< float64_t > features_to_matrix(CFeatures *features)
Base class for neural network layers.
Definition: NeuralLayer.h:73
virtual void quick_connect()
SGVector< float64_t > pt_gd_error_damping_coeff
virtual float64_t compute_error(SGMatrix< float64_t > inputs, SGMatrix< float64_t > targets)
float64_t gd_learning_rate_decay
ENNOptimizationMethod optimization_method
float64_t m_contraction_coefficient
Definition: Autoencoder.h:188
SGVector< float64_t > pt_epsilon
EAENoiseType
Determines the noise type for denoising autoencoders.
Definition: Autoencoder.h:46
virtual CDenseFeatures< float64_t > * reconstruct(CDenseFeatures< float64_t > *data)
virtual float64_t compute_contraction_term(SGVector< float64_t > parameters)
Definition: NeuralLayer.h:228
float64_t gd_error_damping_coeff
virtual void pre_train(CFeatures *data)
virtual bool train(CFeatures *data)
Definition: Autoencoder.cpp:67
virtual void set_batch_size(int32_t batch_size)
double float64_t
Definition: common.h:50
SGVector< float64_t > pt_l1_coefficient
Dynamic array class for CSGObject pointers that creates an array that can be used like a list or an a...
index_t num_cols
Definition: SGMatrix.h:300
void set_const(T const_elem)
Definition: SGVector.cpp:124
CNeuralLayer * get_layer(int32_t i)
Neural layer with linear neurons, with an identity activation function. can be used as a hidden layer...
Neural layer with linear neurons, with a logistic activation function. can be used as a hidden layer ...
virtual void set_layers(CDynamicObjectArray *layers)
virtual float64_t compute_error(SGMatrix< float64_t > targets)
virtual void set_batch_size(int32_t batch_size)
Definition: NeuralLayer.cpp:73
virtual const char * get_name() const
The class Features is the base class of all feature objects.
Definition: Features.h:68
virtual CNeuralNetwork * convert_to_neural_network(CNeuralLayer *output_layer=NULL, float64_t sigma=0.01)
float64_t noise_parameter
Definition: Autoencoder.h:176
virtual SGMatrix< float64_t > forward_propagate(CFeatures *data, int32_t j=-1)
SGVector< int32_t > pt_gd_mini_batch_size
virtual void initialize(float64_t sigma=0.01f)
SGVector< float64_t > pt_noise_parameter
virtual void set_layers(CDynamicObjectArray *layers)
SGVector< int32_t > pt_noise_type
#define SG_ADD(...)
Definition: SGObject.h:67
Neural layer with rectified linear neurons.
SGVector< float64_t > pt_l2_coefficient
SGVector< int32_t > pt_optimization_method
SGVector< float64_t > pt_gd_momentum
ENNOptimizationMethod
Definition: NeuralNetwork.h:49
SGVector< float64_t > pt_gd_learning_rate
index_t vlen
Definition: SGVector.h:707
bool append_element(CSGObject *e)
float64_t contraction_coefficient
Definition: NeuralLayer.h:294
SGVector< float64_t > pt_gd_learning_rate_decay

SHOGUN 机器学习工具包 - 项目文档