MLPACK  1.0.10
cosine_tree.hpp
Go to the documentation of this file.
1 
23 #ifndef __MLPACK_CORE_TREE_COSINE_TREE_COSINE_TREE_HPP
24 #define __MLPACK_CORE_TREE_COSINE_TREE_COSINE_TREE_HPP
25 
26 #include <mlpack/core.hpp>
27 #include <boost/heap/priority_queue.hpp>
28 
29 namespace mlpack {
30 namespace tree {
31 
32 // Predeclare classes for CosineNodeQueue typedef.
33 class CompareCosineNode;
34 class CosineTree;
35 
36 // CosineNodeQueue typedef.
37 typedef boost::heap::priority_queue<CosineTree*,
38  boost::heap::compare<CompareCosineNode> > CosineNodeQueue;
39 
41 {
42  public:
43 
52  CosineTree(const arma::mat& dataset);
53 
63  CosineTree(CosineTree& parentNode, const std::vector<size_t>& subIndices);
64 
79  CosineTree(const arma::mat& dataset,
80  const double epsilon,
81  const double delta);
82 
92  void ModifiedGramSchmidt(CosineNodeQueue& treeQueue,
93  arma::vec& centroid,
94  arma::vec& newBasisVector,
95  arma::vec* addBasisVector = NULL);
96 
109  double MonteCarloError(CosineTree* node,
110  CosineNodeQueue& treeQueue,
111  arma::vec* addBasisVector1 = NULL,
112  arma::vec* addBasisVector2 = NULL);
113 
119  void ConstructBasis(CosineNodeQueue& treeQueue);
120 
126  void CosineNodeSplit();
127 
134  void ColumnSamplesLS(std::vector<size_t>& sampledIndices,
135  arma::vec& probabilities, size_t numSamples);
136 
143  size_t ColumnSampleLS();
144 
157  size_t BinarySearch(arma::vec& cDistribution, double value, size_t start,
158  size_t end);
159 
167  void CalculateCosines(arma::vec& cosines);
168 
173  void CalculateCentroid();
174 
176  void GetFinalBasis(arma::mat& finalBasis) { finalBasis = basis; }
177 
179  const arma::mat& GetDataset() const { return dataset; }
180 
182  std::vector<size_t>& VectorIndices() { return indices; }
183 
185  void L2Error(const double error) { this->l2Error = error; }
186 
188  double L2Error() const { return l2Error; }
189 
191  arma::vec& Centroid() { return centroid; }
192 
194  void BasisVector(arma::vec& bVector) { this->basisVector = bVector; }
195 
197  arma::vec& BasisVector() { return basisVector; }
198 
200  CosineTree* Left() { return left; }
201 
203  CosineTree* Right() { return right; }
204 
206  size_t NumColumns() const { return numColumns; }
207 
209  double FrobNormSquared() const { return frobNormSquared; }
210 
212  size_t SplitPointIndex() const { return indices[splitPointIndex]; }
213 
214  private:
216  const arma::mat& dataset;
218  double epsilon;
220  double delta;
222  arma::mat basis;
230  std::vector<size_t> indices;
232  arma::vec l2NormsSquared;
234  arma::vec centroid;
236  arma::vec basisVector;
240  size_t numColumns;
242  double l2Error;
245 };
246 
248 {
249  public:
250 
251  // Comparison function for construction of priority queue.
252  bool operator() (const CosineTree* a, const CosineTree* b) const
253  {
254  return a->L2Error() < b->L2Error();
255  }
256 };
257 
258 }; // namespace tree
259 }; // namespace mlpack
260 
261 #endif
void CalculateCosines(arma::vec &cosines)
Calculate cosines of the columns present in the node, with respect to the sampled splitting point...
CosineTree * left
Left child of the node.
std::vector< size_t > indices
Indices of columns of input matrix in the node.
size_t numColumns
Number of columns of input matrix in the node.
arma::vec l2NormsSquared
L2-norm squared of columns in the node.
size_t SplitPointIndex() const
Get the column index of split point of the node.
double FrobNormSquared() const
Get the Frobenius norm squared of columns in the node.
void CosineNodeSplit()
This function splits the cosine node into two children based on the cosines of the columns contained ...
double MonteCarloError(CosineTree *node, CosineNodeQueue &treeQueue, arma::vec *addBasisVector1=NULL, arma::vec *addBasisVector2=NULL)
Estimates the squared error of the projection of the input node's matrix onto the current vector subs...
size_t splitPointIndex
Index of split point of cosine node.
void GetFinalBasis(arma::mat &finalBasis)
Returns the basis of the constructed subspace.
size_t ColumnSampleLS()
Sample a point from the Length-Squared distribution of the cosine node.
arma::vec centroid
Centroid of columns of input matrix in the node.
const arma::mat & dataset
Matrix for which cosine tree is constructed.
arma::vec & Centroid()
Get pointer to the centroid vector.
void ConstructBasis(CosineNodeQueue &treeQueue)
Constructs the final basis matrix, after the cosine tree construction.
double epsilon
Error tolerance fraction for calculated subspace.
bool operator()(const CosineTree *a, const CosineTree *b) const
CosineTree * Left()
Get pointer to the left child of the node.
CosineTree(const arma::mat &dataset)
CosineTree constructor for the root node of the tree.
void BasisVector(arma::vec &bVector)
Set the basis vector of the node.
size_t BinarySearch(arma::vec &cDistribution, double value, size_t start, size_t end)
Sample a column based on the cumulative Length-Squared distribution of the cosine node...
arma::vec & BasisVector()
Get the basis vector of the node.
void CalculateCentroid()
Calculate centroid of the columns present in the node.
CosineTree * parent
Parent of the node.
arma::vec basisVector
Orthonormalized basis vector of the node.
CosineTree * right
Right child of the node.
double L2Error() const
Get the Monte Carlo error.
size_t NumColumns() const
Get number of columns of input matrix in the node.
CosineTree * Right()
Get pointer to the right child of the node.
void ModifiedGramSchmidt(CosineNodeQueue &treeQueue, arma::vec &centroid, arma::vec &newBasisVector, arma::vec *addBasisVector=NULL)
Calculates the orthonormalization of the passed centroid, with respect to the current vector subspace...
boost::heap::priority_queue< CosineTree *, boost::heap::compare< CompareCosineNode > > CosineNodeQueue
Definition: cosine_tree.hpp:34
double l2Error
Monte Carlo error for this node.
arma::mat basis
Subspace basis of the input dataset.
double frobNormSquared
Frobenius norm squared of columns in the node.
void ColumnSamplesLS(std::vector< size_t > &sampledIndices, arma::vec &probabilities, size_t numSamples)
Sample 'numSamples' points from the Length-Squared distribution of the cosine node.
void L2Error(const double error)
Set the Monte Carlo error.
const arma::mat & GetDataset() const
Get pointer to the dataset matrix.
std::vector< size_t > & VectorIndices()
Get the indices of columns in the node.
double delta
Cumulative probability for Monte Carlo error lower bound.