Point Cloud Library (PCL)  1.7.1
sparse_matrix.hpp
1 /*
2 Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without modification,
6 are permitted provided that the following conditions are met:
7 
8 Redistributions of source code must retain the above copyright notice, this list of
9 conditions and the following disclaimer. Redistributions in binary form must reproduce
10 the above copyright notice, this list of conditions and the following disclaimer
11 in the documentation and/or other materials provided with the distribution.
12 
13 Neither the name of the Johns Hopkins University nor the names of its contributors
14 may be used to endorse or promote products derived from this software without specific
15 prior written permission.
16 
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
18 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES
19 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
20 SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22 TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
23 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
25 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
26 DAMAGE.
27 */
28 
29 #include <float.h>
30 #ifdef _WIN32
31 # ifndef WIN32_LEAN_AND_MEAN
32 # define WIN32_LEAN_AND_MEAN
33 # endif // WIN32_LEAN_AND_MEAN
34 # ifndef NOMINMAX
35 # define NOMINMAX
36 # endif // NOMINMAX
37 # include <windows.h>
38 #endif //_WIN32
39 
40 ///////////////////
41 // SparseMatrix //
42 ///////////////////
43 ///////////////////////////////////////////
44 // Static Allocator Methods and Memebers //
45 ///////////////////////////////////////////
46 
47 namespace pcl
48 {
49  namespace poisson
50  {
51 
52 
53  template<class T> int SparseMatrix<T>::UseAlloc=0;
54  template<class T> Allocator<MatrixEntry<T> > SparseMatrix<T>::internalAllocator;
55  template<class T> int SparseMatrix<T>::UseAllocator(void){return UseAlloc;}
56  template<class T>
57  void SparseMatrix<T>::SetAllocator( int blockSize )
58  {
59  if(blockSize>0){
60  UseAlloc=1;
61  internalAllocator.set(blockSize);
62  }
63  else{UseAlloc=0;}
64  }
65  ///////////////////////////////////////
66  // SparseMatrix Methods and Memebers //
67  ///////////////////////////////////////
68 
69  template< class T >
71  {
72  _contiguous = false;
73  _maxEntriesPerRow = 0;
74  rows = 0;
75  rowSizes = NULL;
76  m_ppElements = NULL;
77  }
78 
79  template< class T > SparseMatrix< T >::SparseMatrix( int rows ) : SparseMatrix< T >() { Resize( rows ); }
80  template< class T > SparseMatrix< T >::SparseMatrix( int rows , int maxEntriesPerRow ) : SparseMatrix< T >() { Resize( rows , maxEntriesPerRow ); }
81 
82  template< class T >
84  {
85  if( M._contiguous ) Resize( M.rows , M._maxEntriesPerRow );
86  else Resize( M.rows );
87  for( int i=0 ; i<rows ; i++ )
88  {
89  SetRowSize( i , M.rowSizes[i] );
90  memcpy( (*this)[i] , M[i] , sizeof( MatrixEntry< T > ) * rowSizes[i] );
91  }
92  }
93  template<class T>
94  int SparseMatrix<T>::Entries( void ) const
95  {
96  int e = 0;
97  for( int i=0 ; i<rows ; i++ ) e += int( rowSizes[i] );
98  return e;
99  }
100  template<class T>
102  {
103  if( M._contiguous ) Resize( M.rows , M._maxEntriesPerRow );
104  else Resize( M.rows );
105  for( int i=0 ; i<rows ; i++ )
106  {
107  SetRowSize( i , M.rowSizes[i] );
108  memcpy( (*this)[i] , M[i] , sizeof( MatrixEntry< T > ) * rowSizes[i] );
109  }
110  return *this;
111  }
112 
113  template<class T>
114  SparseMatrix<T>::~SparseMatrix( void ){ Resize( 0 ); }
115 
116  template< class T >
117  bool SparseMatrix< T >::write( const char* fileName ) const
118  {
119  FILE* fp = fopen( fileName , "wb" );
120  if( !fp ) return false;
121  bool ret = write( fp );
122  fclose( fp );
123  return ret;
124  }
125  template< class T >
126  bool SparseMatrix< T >::read( const char* fileName )
127  {
128  FILE* fp = fopen( fileName , "rb" );
129  if( !fp ) return false;
130  bool ret = read( fp );
131  fclose( fp );
132  return ret;
133  }
134  template< class T >
135  bool SparseMatrix< T >::write( FILE* fp ) const
136  {
137  if( fwrite( &rows , sizeof( int ) , 1 , fp )!=1 ) return false;
138  if( fwrite( rowSizes , sizeof( int ) , rows , fp )!=rows ) return false;
139  for( int i=0 ; i<rows ; i++ ) if( fwrite( (*this)[i] , sizeof( MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] ) return false;
140  return true;
141  }
142  template< class T >
143  bool SparseMatrix< T >::read( FILE* fp )
144  {
145  int r;
146  if( fread( &r , sizeof( int ) , 1 , fp )!=1 ) return false;
147  Resize( r );
148  if( fread( rowSizes , sizeof( int ) , rows , fp )!=rows ) return false;
149  for( int i=0 ; i<rows ; i++ )
150  {
151  r = rowSizes[i];
152  rowSizes[i] = 0;
153  SetRowSize( i , r );
154  if( fread( (*this)[i] , sizeof( MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] ) return false;
155  }
156  return true;
157  }
158 
159 
160  template< class T >
162  {
163  if( rows>0 )
164  {
165 
166  if( !UseAlloc )
167  if( _contiguous ){ if( _maxEntriesPerRow ) free( m_ppElements[0] ); }
168  else for( int i=0 ; i<rows ; i++ ){ if( rowSizes[i] ) free( m_ppElements[i] ); }
169  free( m_ppElements );
170  free( rowSizes );
171  }
172  rows = r;
173  if( r )
174  {
175  rowSizes = ( int* )malloc( sizeof( int ) * r );
176  memset( rowSizes , 0 , sizeof( int ) * r );
177  m_ppElements = ( MatrixEntry< T >** )malloc( sizeof( MatrixEntry< T >* ) * r );
178  }
179  _contiguous = false;
180  _maxEntriesPerRow = 0;
181  }
182  template< class T >
183  void SparseMatrix< T >::Resize( int r , int e )
184  {
185  if( rows>0 )
186  {
187  if( !UseAlloc )
188  if( _contiguous ){ if( _maxEntriesPerRow ) free( m_ppElements[0] ); }
189  else for( int i=0 ; i<rows ; i++ ){ if( rowSizes[i] ) free( m_ppElements[i] ); }
190  free( m_ppElements );
191  free( rowSizes );
192  }
193  rows = r;
194  if( r )
195  {
196  rowSizes = ( int* )malloc( sizeof( int ) * r );
197  memset( rowSizes , 0 , sizeof( int ) * r );
198  m_ppElements = ( MatrixEntry< T >** )malloc( sizeof( MatrixEntry< T >* ) * r );
199  m_ppElements[0] = ( MatrixEntry< T >* )malloc( sizeof( MatrixEntry< T > ) * r * e );
200  for( int i=1 ; i<r ; i++ ) m_ppElements[i] = m_ppElements[i-1] + e;
201  }
202  _contiguous = true;
203  _maxEntriesPerRow = e;
204  }
205 
206  template<class T>
207  void SparseMatrix< T >::SetRowSize( int row , int count )
208  {
209  if( _contiguous )
210  {
211  if( count>_maxEntriesPerRow ) fprintf( stderr , "[ERROR] Cannot set row size on contiguous matrix: %d<=%d\n" , count , _maxEntriesPerRow ) , exit( 0 );
212  rowSizes[row] = count;
213  }
214  else if( row>=0 && row<rows )
215  {
216  if( UseAlloc ) m_ppElements[row] = internalAllocator.newElements(count);
217  else
218  {
219  if( rowSizes[row] ) free( m_ppElements[row] );
220  if( count>0 ) m_ppElements[row] = ( MatrixEntry< T >* )malloc( sizeof( MatrixEntry< T > ) * count );
221  }
222  }
223  }
224 
225 
226  template<class T>
228  {
229  Resize(this->m_N, this->m_M);
230  }
231 
232  template<class T>
234  {
235  SetZero();
236  for(int ij=0; ij < Min( this->Rows(), this->Columns() ); ij++)
237  (*this)(ij,ij) = T(1);
238  }
239 
240  template<class T>
242  {
243  SparseMatrix<T> M(*this);
244  M *= V;
245  return M;
246  }
247 
248  template<class T>
250  {
251  for (int i=0; i<this->Rows(); i++)
252  {
253  for(int ii=0;ii<m_ppElements[i].size();i++){m_ppElements[i][ii].Value*=V;}
254  }
255  return *this;
256  }
257 
258  template<class T>
260  {
261  SparseMatrix<T> R( this->Rows(), M.Columns() );
262  for(int i=0; i<R.Rows(); i++){
263  for(int ii=0;ii<m_ppElements[i].size();ii++){
264  int N=m_ppElements[i][ii].N;
265  T Value=m_ppElements[i][ii].Value;
266  for(int jj=0;jj<M.m_ppElements[N].size();jj++){
267  R(i,M.m_ppElements[N][jj].N) += Value * M.m_ppElements[N][jj].Value;
268  }
269  }
270  }
271  return R;
272  }
273 
274  template<class T>
275  template<class T2>
277  {
278  Vector<T2> R( rows );
279 
280  for (int i=0; i<rows; i++)
281  {
282  T2 temp=T2();
283  for(int ii=0;ii<rowSizes[i];ii++){
284  temp+=m_ppElements[i][ii].Value * V.m_pV[m_ppElements[i][ii].N];
285  }
286  R(i)=temp;
287  }
288  return R;
289  }
290 
291  template<class T>
292  template<class T2>
293  void SparseMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , int threads ) const
294  {
295 #pragma omp parallel for num_threads( threads ) schedule( static )
296  for( int i=0 ; i<rows ; i++ )
297  {
298  T2 temp = T2();
299  temp *= 0;
300  for( int j=0 ; j<rowSizes[i] ; j++ ) temp += m_ppElements[i][j].Value * In.m_pV[m_ppElements[i][j].N];
301  Out.m_pV[i] = temp;
302  }
303  }
304 
305  template<class T>
307  {
308  return Multiply(M);
309  }
310  template<class T>
311  template<class T2>
313  {
314  return Multiply(V);
315  }
316 
317  template<class T>
319  {
320  SparseMatrix<T> M( this->Columns(), this->Rows() );
321 
322  for (int i=0; i<this->Rows(); i++)
323  {
324  for(int ii=0;ii<m_ppElements[i].size();ii++){
325  M(m_ppElements[i][ii].N,i) = m_ppElements[i][ii].Value;
326  }
327  }
328  return M;
329  }
330 
331  template<class T>
332  template<class T2>
333  int SparseMatrix<T>::SolveSymmetric( const SparseMatrix<T>& M , const Vector<T2>& b , int iters , Vector<T2>& solution , const T2 eps , int reset , int threads )
334  {
335  if( reset )
336  {
337  solution.Resize( b.Dimensions() );
338  solution.SetZero();
339  }
340  Vector< T2 > r;
341  r.Resize( solution.Dimensions() );
342  M.Multiply( solution , r );
343  r = b - r;
344  Vector< T2 > d = r;
345  double delta_new , delta_0;
346  for( int i=0 ; i<r.Dimensions() ; i++ ) delta_new += r.m_pV[i] * r.m_pV[i];
347  delta_0 = delta_new;
348  if( delta_new<eps ) return 0;
349  int ii;
350  Vector< T2 > q;
351  q.Resize( d.Dimensions() );
352  for( ii=0; ii<iters && delta_new>eps*delta_0 ; ii++ )
353  {
354  M.Multiply( d , q , threads );
355  double dDotQ = 0 , alpha = 0;
356  for( int i=0 ; i<d.Dimensions() ; i++ ) dDotQ += d.m_pV[i] * q.m_pV[i];
357  alpha = delta_new / dDotQ;
358 #pragma omp parallel for num_threads( threads ) schedule( static )
359  for( int i=0 ; i<r.Dimensions() ; i++ ) solution.m_pV[i] += d.m_pV[i] * T2( alpha );
360  if( !(ii%50) )
361  {
362  r.Resize( solution.Dimensions() );
363  M.Multiply( solution , r , threads );
364  r = b - r;
365  }
366  else
367 #pragma omp parallel for num_threads( threads ) schedule( static )
368  for( int i=0 ; i<r.Dimensions() ; i++ ) r.m_pV[i] = r.m_pV[i] - q.m_pV[i] * T2(alpha);
369 
370  double delta_old = delta_new , beta;
371  delta_new = 0;
372  for( int i=0 ; i<r.Dimensions() ; i++ ) delta_new += r.m_pV[i]*r.m_pV[i];
373  beta = delta_new / delta_old;
374 #pragma omp parallel for num_threads( threads ) schedule( static )
375  for( int i=0 ; i<d.Dimensions() ; i++ ) d.m_pV[i] = r.m_pV[i] + d.m_pV[i] * T2( beta );
376  }
377  return ii;
378  }
379 
380  // Solve for x s.t. M(x)=b by solving for x s.t. M^tM(x)=M^t(b)
381  template<class T>
382  int SparseMatrix<T>::Solve(const SparseMatrix<T>& M,const Vector<T>& b,int iters,Vector<T>& solution,const T eps){
383  SparseMatrix mTranspose=M.Transpose();
384  Vector<T> bb=mTranspose*b;
385  Vector<T> d,r,Md;
386  T alpha,beta,rDotR;
387  int i;
388 
389  solution.Resize(M.Columns());
390  solution.SetZero();
391 
392  d=r=bb;
393  rDotR=r.Dot(r);
394  for(i=0;i<iters && rDotR>eps;i++){
395  T temp;
396  Md=mTranspose*(M*d);
397  alpha=rDotR/d.Dot(Md);
398  solution+=d*alpha;
399  r-=Md*alpha;
400  temp=r.Dot(r);
401  beta=temp/rDotR;
402  rDotR=temp;
403  d=r+d*beta;
404  }
405  return i;
406  }
407 
408 
409 
410 
411  ///////////////////////////
412  // SparseSymmetricMatrix //
413  ///////////////////////////
414  template<class T>
415  template<class T2>
417  template<class T>
418  template<class T2>
420  {
422 
423  for(int i=0; i<SparseMatrix<T>::rows; i++){
424  for(int ii=0;ii<SparseMatrix<T>::rowSizes[i];ii++){
425  int j=SparseMatrix<T>::m_ppElements[i][ii].N;
426  R(i)+=SparseMatrix<T>::m_ppElements[i][ii].Value * V.m_pV[j];
427  R(j)+=SparseMatrix<T>::m_ppElements[i][ii].Value * V.m_pV[i];
428  }
429  }
430  return R;
431  }
432 
433  template<class T>
434  template<class T2>
435  void SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , bool addDCTerm ) const
436  {
437  Out.SetZero();
438  const T2* in = &In[0];
439  T2* out = &Out[0];
440  T2 dcTerm = T2( 0 );
441  if( addDCTerm )
442  {
443  for( int i=0 ; i<SparseMatrix<T>::rows ; i++ ) dcTerm += in[i];
444  dcTerm /= SparseMatrix<T>::rows;
445  }
446  for( int i=0 ; i<this->SparseMatrix<T>::rows ; i++ )
447  {
449  const MatrixEntry<T>* end = temp + SparseMatrix<T>::rowSizes[i];
450  const T2& in_i_ = in[i];
451  T2 out_i = T2(0);
452  for( ; temp!=end ; temp++ )
453  {
454  int j=temp->N;
455  T2 v=temp->Value;
456  out_i += v * in[j];
457  out[j] += v * in_i_;
458  }
459  out[i] += out_i;
460  }
461  if( addDCTerm ) for( int i=0 ; i<SparseMatrix<T>::rows ; i++ ) out[i] += dcTerm;
462  }
463  template<class T>
464  template<class T2>
465  void SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , MapReduceVector< T2 >& OutScratch , bool addDCTerm ) const
466  {
467  int dim = int( In.Dimensions() );
468  const T2* in = &In[0];
469  int threads = OutScratch.threads();
470  if( addDCTerm )
471  {
472  T2 dcTerm = 0;
473 #pragma omp parallel for num_threads( threads ) reduction ( + : dcTerm )
474  for( int t=0 ; t<threads ; t++ )
475  {
476  T2* out = OutScratch[t];
477  memset( out , 0 , sizeof( T2 ) * dim );
478  for( int i=(SparseMatrix<T>::rows*t)/threads ; i<(SparseMatrix<T>::rows*(t+1))/threads ; i++ )
479  {
480  const T2& in_i_ = in[i];
481  T2& out_i_ = out[i];
482  for( const MatrixEntry< T > *temp = SparseMatrix<T>::m_ppElements[i] , *end = temp+SparseMatrix<T>::rowSizes[i] ; temp!=end ; temp++ )
483  {
484  int j = temp->N;
485  T2 v = temp->Value;
486  out_i_ += v * in[j];
487  out[j] += v * in_i_;
488  }
489  dcTerm += in_i_;
490  }
491  }
492  dcTerm /= dim;
493  dim = int( Out.Dimensions() );
494  T2* out = &Out[0];
495 #pragma omp parallel for num_threads( threads ) schedule( static )
496  for( int i=0 ; i<dim ; i++ )
497  {
498  T2 _out = dcTerm;
499  for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
500  out[i] = _out;
501  }
502  }
503  else
504  {
505 #pragma omp parallel for num_threads( threads )
506  for( int t=0 ; t<threads ; t++ )
507  {
508  T2* out = OutScratch[t];
509  memset( out , 0 , sizeof( T2 ) * dim );
510  for( int i=(SparseMatrix<T>::rows*t)/threads ; i<(SparseMatrix<T>::rows*(t+1))/threads ; i++ )
511  {
512  const T2& in_i_ = in[i];
513  T2& out_i_ = out[i];
514  for( const MatrixEntry< T > *temp = SparseMatrix<T>::m_ppElements[i] , *end = temp+SparseMatrix<T>::rowSizes[i] ; temp!=end ; temp++ )
515  {
516  int j = temp->N;
517  T2 v = temp->Value;
518  out_i_ += v * in[j];
519  out[j] += v * in_i_;
520  }
521  }
522  }
523  dim = int( Out.Dimensions() );
524  T2* out = &Out[0];
525 #pragma omp parallel for num_threads( threads ) schedule( static )
526  for( int i=0 ; i<dim ; i++ )
527  {
528  T2 _out = T2(0);
529  for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
530  out[i] = _out;
531  }
532  }
533  }
534  template<class T>
535  template<class T2>
536  void SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , std::vector< T2* >& OutScratch , const std::vector< int >& bounds ) const
537  {
538  int dim = In.Dimensions();
539  const T2* in = &In[0];
540  int threads = OutScratch.size();
541 #pragma omp parallel for num_threads( threads )
542  for( int t=0 ; t<threads ; t++ )
543  for( int i=0 ; i<dim ; i++ ) OutScratch[t][i] = T2(0);
544 #pragma omp parallel for num_threads( threads )
545  for( int t=0 ; t<threads ; t++ )
546  {
547  T2* out = OutScratch[t];
548  for( int i=bounds[t] ; i<bounds[t+1] ; i++ )
549  {
551  const MatrixEntry<T>* end = temp + SparseMatrix<T>::rowSizes[i];
552  const T2& in_i_ = in[i];
553  T2& out_i_ = out[i];
554  for( ; temp!=end ; temp++ )
555  {
556  int j = temp->N;
557  T2 v = temp->Value;
558  out_i_ += v * in[j];
559  out[j] += v * in_i_;
560  }
561  }
562  }
563  T2* out = &Out[0];
564 #pragma omp parallel for num_threads( threads ) schedule( static )
565  for( int i=0 ; i<Out.Dimensions() ; i++ )
566  {
567  T2& _out = out[i];
568  _out = T2(0);
569  for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
570  }
571  }
572 #if defined _WIN32 && !defined __MINGW32__
573 #ifndef _AtomicIncrement_
574 #define _AtomicIncrement_
575  inline void AtomicIncrement( volatile float* ptr , float addend )
576  {
577  float newValue = *ptr;
578  LONG& _newValue = *( (LONG*)&newValue );
579  LONG _oldValue;
580  for( ;; )
581  {
582  _oldValue = _newValue;
583  newValue += addend;
584  _newValue = InterlockedCompareExchange( (LONG*) ptr , _newValue , _oldValue );
585  if( _newValue==_oldValue ) break;
586  }
587  }
588  inline void AtomicIncrement( volatile double* ptr , double addend )
589  //inline void AtomicIncrement( double* ptr , double addend )
590  {
591  double newValue = *ptr;
592  LONGLONG& _newValue = *( (LONGLONG*)&newValue );
593  LONGLONG _oldValue;
594  do
595  {
596  _oldValue = _newValue;
597  newValue += addend;
598  _newValue = InterlockedCompareExchange64( (LONGLONG*) ptr , _newValue , _oldValue );
599  }
600  while( _newValue!=_oldValue );
601  }
602 #endif // _AtomicIncrement_
603  template< class T >
604  void MultiplyAtomic( const SparseSymmetricMatrix< T >& A , const Vector< float >& In , Vector< float >& Out , int threads , const int* partition=NULL )
605  {
606  Out.SetZero();
607  const float* in = &In[0];
608  float* out = &Out[0];
609  if( partition )
610 #pragma omp parallel for num_threads( threads )
611  for( int t=0 ; t<threads ; t++ )
612  for( int i=partition[t] ; i<partition[t+1] ; i++ )
613  {
614  const MatrixEntry< T >* temp = A[i];
615  const MatrixEntry< T >* end = temp + A.rowSizes[i];
616  const float& in_i = in[i];
617  float out_i = 0.;
618  for( ; temp!=end ; temp++ )
619  {
620  int j = temp->N;
621  float v = temp->Value;
622  out_i += v * in[j];
623  AtomicIncrement( out+j , v * in_i );
624  }
625  AtomicIncrement( out+i , out_i );
626  }
627  else
628 #pragma omp parallel for num_threads( threads )
629  for( int i=0 ; i<A.rows ; i++ )
630  {
631  const MatrixEntry< T >* temp = A[i];
632  const MatrixEntry< T >* end = temp + A.rowSizes[i];
633  const float& in_i = in[i];
634  float out_i = 0.f;
635  for( ; temp!=end ; temp++ )
636  {
637  int j = temp->N;
638  float v = temp->Value;
639  out_i += v * in[j];
640  AtomicIncrement( out+j , v * in_i );
641  }
642  AtomicIncrement( out+i , out_i );
643  }
644  }
645  template< class T >
646  void MultiplyAtomic( const SparseSymmetricMatrix< T >& A , const Vector< double >& In , Vector< double >& Out , int threads , const int* partition=NULL )
647  {
648  Out.SetZero();
649  const double* in = &In[0];
650  double* out = &Out[0];
651 
652  if( partition )
653 #pragma omp parallel for num_threads( threads )
654  for( int t=0 ; t<threads ; t++ )
655  for( int i=partition[t] ; i<partition[t+1] ; i++ )
656  {
657  const MatrixEntry< T >* temp = A[i];
658  const MatrixEntry< T >* end = temp + A.rowSizes[i];
659  const double& in_i = in[i];
660  double out_i = 0.;
661  for( ; temp!=end ; temp++ )
662  {
663  int j = temp->N;
664  T v = temp->Value;
665  out_i += v * in[j];
666  AtomicIncrement( out+j , v * in_i );
667  }
668  AtomicIncrement( out+i , out_i );
669  }
670  else
671 #pragma omp parallel for num_threads( threads )
672  for( int i=0 ; i<A.rows ; i++ )
673  {
674  const MatrixEntry< T >* temp = A[i];
675  const MatrixEntry< T >* end = temp + A.rowSizes[i];
676  const double& in_i = in[i];
677  double out_i = 0.;
678  for( ; temp!=end ; temp++ )
679  {
680  int j = temp->N;
681  T v = temp->Value;
682  out_i += v * in[j];
683  AtomicIncrement( out+j , v * in_i );
684  }
685  AtomicIncrement( out+i , out_i );
686  }
687  }
688 
689  template< class T >
690  template< class T2 >
691  int SparseSymmetricMatrix< T >::SolveAtomic( const SparseSymmetricMatrix< T >& A , const Vector< T2 >& b , int iters , Vector< T2 >& x , T2 eps , int reset , int threads , bool solveNormal )
692  {
693  eps *= eps;
694  int dim = b.Dimensions();
695  if( reset )
696  {
697  x.Resize( dim );
698  x.SetZero();
699  }
700  Vector< T2 > r( dim ) , d( dim ) , q( dim );
701  Vector< T2 > temp;
702  if( solveNormal ) temp.Resize( dim );
703  T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
704  const T2* _b = &b[0];
705 
706  std::vector< int > partition( threads+1 );
707  {
708  int eCount = 0;
709  for( int i=0 ; i<A.rows ; i++ ) eCount += A.rowSizes[i];
710  partition[0] = 0;
711 #pragma omp parallel for num_threads( threads )
712  for( int t=0 ; t<threads ; t++ )
713  {
714  int _eCount = 0;
715  for( int i=0 ; i<A.rows ; i++ )
716  {
717  _eCount += A.rowSizes[i];
718  if( _eCount*threads>=eCount*(t+1) )
719  {
720  partition[t+1] = i;
721  break;
722  }
723  }
724  }
725  partition[threads] = A.rows;
726  }
727  if( solveNormal )
728  {
729  MultiplyAtomic( A , x , temp , threads , &partition[0] );
730  MultiplyAtomic( A , temp , r , threads , &partition[0] );
731  MultiplyAtomic( A , b , temp , threads , &partition[0] );
732 #pragma omp parallel for num_threads( threads ) schedule( static )
733  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i];
734  }
735  else
736  {
737  MultiplyAtomic( A , x , r , threads , &partition[0] );
738 #pragma omp parallel for num_threads( threads ) schedule( static )
739  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i];
740  }
741  double delta_new = 0 , delta_0;
742  for( size_t i=0 ; i<dim ; i++ ) delta_new += _r[i] * _r[i];
743  delta_0 = delta_new;
744  if( delta_new<eps )
745  {
746  fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
747  return 0;
748  }
749  int ii;
750  for( ii=0; ii<iters && delta_new>eps*delta_0 ; ii++ )
751  {
752  if( solveNormal ) MultiplyAtomic( A , d , temp , threads , &partition[0] ) , MultiplyAtomic( A , temp , q , threads , &partition[0] );
753  else MultiplyAtomic( A , d , q , threads , &partition[0] );
754  double dDotQ = 0;
755  for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
756  T2 alpha = T2( delta_new / dDotQ );
757 #pragma omp parallel for num_threads( threads ) schedule( static )
758  for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
759  if( (ii%50)==(50-1) )
760  {
761  r.Resize( dim );
762  if( solveNormal ) MultiplyAtomic( A , x , temp , threads , &partition[0] ) , MultiplyAtomic( A , temp , r , threads , &partition[0] );
763  else MultiplyAtomic( A , x , r , threads , &partition[0] );
764 #pragma omp parallel for num_threads( threads ) schedule( static )
765  for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i];
766  }
767  else
768 #pragma omp parallel for num_threads( threads ) schedule( static )
769  for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha;
770 
771  double delta_old = delta_new;
772  delta_new = 0;
773  for( size_t i=0 ; i<dim ; i++ ) delta_new += _r[i] * _r[i];
774  T2 beta = T2( delta_new / delta_old );
775 #pragma omp parallel for num_threads( threads ) schedule( static )
776  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
777  }
778  return ii;
779  }
780 #endif // _WIN32 && !__MINGW32__
781  template< class T >
782  template< class T2 >
783  int SparseSymmetricMatrix< T >::Solve( const SparseSymmetricMatrix<T>& A , const Vector<T2>& b , int iters , Vector<T2>& x , MapReduceVector< T2 >& scratch , T2 eps , int reset , bool addDCTerm , bool solveNormal )
784  {
785  int threads = scratch.threads();
786  eps *= eps;
787  int dim = int( b.Dimensions() );
788  Vector< T2 > r( dim ) , d( dim ) , q( dim ) , temp;
789  if( reset ) x.Resize( dim );
790  if( solveNormal ) temp.Resize( dim );
791  T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
792  const T2* _b = &b[0];
793 
794  double delta_new = 0 , delta_0;
795  if( solveNormal )
796  {
797  A.Multiply( x , temp , scratch , addDCTerm ) , A.Multiply( temp , r , scratch , addDCTerm ) , A.Multiply( b , temp , scratch , addDCTerm );
798 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
799  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i] , delta_new += _r[i] * _r[i];
800  }
801  else
802  {
803  A.Multiply( x , r , scratch , addDCTerm );
804 #pragma omp parallel for num_threads( threads ) reduction ( + : delta_new )
805  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i];
806  }
807  delta_0 = delta_new;
808  if( delta_new<eps )
809  {
810  fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
811  return 0;
812  }
813  int ii;
814  for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
815  {
816  if( solveNormal ) A.Multiply( d , temp , scratch , addDCTerm ) , A.Multiply( temp , q , scratch , addDCTerm );
817  else A.Multiply( d , q , scratch , addDCTerm );
818  double dDotQ = 0;
819 #pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
820  for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
821  T2 alpha = T2( delta_new / dDotQ );
822  double delta_old = delta_new;
823  delta_new = 0;
824  if( (ii%50)==(50-1) )
825  {
826 #pragma omp parallel for num_threads( threads )
827  for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
828  r.Resize( dim );
829  if( solveNormal ) A.Multiply( x , temp , scratch , addDCTerm ) , A.Multiply( temp , r , scratch , addDCTerm );
830  else A.Multiply( x , r , scratch , addDCTerm );
831 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
832  for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
833  }
834  else
835 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
836  for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
837 
838  T2 beta = T2( delta_new / delta_old );
839 #pragma omp parallel for num_threads( threads )
840  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
841  }
842  return ii;
843  }
844  template< class T >
845  template< class T2 >
846  int SparseSymmetricMatrix<T>::Solve( const SparseSymmetricMatrix<T>& A , const Vector<T2>& b , int iters , Vector<T2>& x , T2 eps , int reset , int threads , bool addDCTerm , bool solveNormal )
847  {
848  eps *= eps;
849  int dim = int( b.Dimensions() );
850  MapReduceVector< T2 > outScratch;
851  if( threads<1 ) threads = 1;
852  if( threads>1 ) outScratch.resize( threads , dim );
853  if( reset ) x.Resize( dim );
854  Vector< T2 > r( dim ) , d( dim ) , q( dim );
855  Vector< T2 > temp;
856  if( solveNormal ) temp.Resize( dim );
857  T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
858  const T2* _b = &b[0];
859 
860  double delta_new = 0 , delta_0;
861 
862  if( solveNormal )
863  {
864  if( threads>1 ) A.Multiply( x , temp , outScratch , addDCTerm ) , A.Multiply( temp , r , outScratch , addDCTerm ) , A.Multiply( b , temp , outScratch , addDCTerm );
865  else A.Multiply( x , temp , addDCTerm ) , A.Multiply( temp , r , addDCTerm ) , A.Multiply( b , temp , addDCTerm );
866 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
867  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i] , delta_new += _r[i] * _r[i];
868  }
869  else
870  {
871  if( threads>1 ) A.Multiply( x , r , outScratch , addDCTerm );
872  else A.Multiply( x , r , addDCTerm );
873 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
874  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i];
875  }
876 
877  delta_0 = delta_new;
878  if( delta_new<eps )
879  {
880  fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
881  return 0;
882  }
883  int ii;
884  for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
885  {
886  if( solveNormal )
887  {
888  if( threads>1 ) A.Multiply( d , temp , outScratch , addDCTerm ) , A.Multiply( temp , q , outScratch , addDCTerm );
889  else A.Multiply( d , temp , addDCTerm ) , A.Multiply( temp , q , addDCTerm );
890  }
891  else
892  {
893  if( threads>1 ) A.Multiply( d , q , outScratch , addDCTerm );
894  else A.Multiply( d , q , addDCTerm );
895  }
896  double dDotQ = 0;
897 #pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
898  for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
899  T2 alpha = T2( delta_new / dDotQ );
900  double delta_old = delta_new;
901  delta_new = 0;
902 
903  if( (ii%50)==(50-1) )
904  {
905 #pragma omp parallel for num_threads( threads )
906  for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
907  r.SetZero();
908  if( solveNormal )
909  {
910  if( threads>1 ) A.Multiply( x , temp , outScratch , addDCTerm ) , A.Multiply( temp , r , outScratch , addDCTerm );
911  else A.Multiply( x , temp , addDCTerm ) , A.Multiply( temp , r , addDCTerm );
912  }
913  else
914  {
915  if( threads>1 ) A.Multiply( x , r , outScratch , addDCTerm );
916  else A.Multiply( x , r , addDCTerm );
917  }
918 #pragma omp parallel for num_threads( threads ) reduction ( + : delta_new )
919  for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
920  }
921  else
922  {
923 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
924  for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
925  }
926 
927  T2 beta = T2( delta_new / delta_old );
928 #pragma omp parallel for num_threads( threads )
929  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
930  }
931  return ii;
932  }
933 
934  template<class T>
935  template<class T2>
936  int SparseSymmetricMatrix<T>::Solve( const SparseSymmetricMatrix<T>& M , const Vector<T2>& diagonal , const Vector<T2>& b , int iters , Vector<T2>& solution , int reset )
937  {
938  Vector<T2> d,r,Md;
939 
940  if(reset)
941  {
942  solution.Resize(b.Dimensions());
943  solution.SetZero();
944  }
945  Md.Resize(M.rows);
946  for( int i=0 ; i<iters ; i++ )
947  {
948  M.Multiply( solution , Md );
949  r = b-Md;
950  // solution_new[j] * diagonal[j] + ( Md[j] - solution_old[j] * diagonal[j] ) = b[j]
951  // solution_new[j] = ( b[j] - ( Md[j] - solution_old[j] * diagonal[j] ) ) / diagonal[j]
952  // solution_new[j] = ( b[j] - Md[j] ) / diagonal[j] + solution_old[j]
953  for( int j=0 ; j<int(M.rows) ; j++ ) solution[j] += (b[j]-Md[j])/diagonal[j];
954  }
955  return iters;
956  }
957  template< class T >
958  template< class T2 >
960  {
961  diagonal.Resize( SparseMatrix<T>::rows );
962  for( int i=0 ; i<SparseMatrix<T>::rows ; i++ )
963  {
964  diagonal[i] = 0.;
965  for( int j=0 ; j<SparseMatrix<T>::rowSizes[i] ; j++ ) if( SparseMatrix<T>::m_ppElements[i][j].N==i ) diagonal[i] += SparseMatrix<T>::m_ppElements[i][j].Value * 2;
966  }
967  }
968 
969  }
970 }
void Resize(size_t N)
Definition: vector.hpp:63
SparseMatrix< T > & operator*=(const T &V)
void SetRowSize(int row, int count)
SparseMatrix< T > Transpose() const
static int UseAllocator(void)
static int SolveSymmetric(const SparseMatrix< T > &M, const Vector< T2 > &b, int iters, Vector< T2 > &solution, const T2 eps=1e-8, int reset=1, int threads=1)
static int Solve(const SparseSymmetricMatrix< T > &M, const Vector< T2 > &b, int iters, Vector< T2 > &solution, T2 eps=1e-8, int reset=1, int threads=0, bool addDCTerm=false, bool solveNormal=false)
MatrixEntry< T > ** m_ppElements
Definition: sparse_matrix.h:66
PCL_EXPORTS void Multiply(const double in1[2], const double in2[2], double out[2])
T Dot(const Vector &V) const
Definition: vector.hpp:237
void getDiagonal(Vector< T2 > &diagonal) const
SparseMatrix< T > operator*(const T &V) const
void write(std::ostream &stream, Type value)
Function for writing data to a stream.
Definition: region_xy.h:64
void resize(int threads, int dim)
static int Solve(const SparseMatrix< T > &M, const Vector< T > &b, int iters, Vector< T > &solution, const T eps=1e-8)
SparseMatrix< T > Multiply(const SparseMatrix< T > &M) const
void read(std::istream &stream, Type &value)
Function for reading data from a stream.
Definition: region_xy.h:47
bool write(FILE *fp) const
Vector< T2 > Multiply(const Vector< T2 > &V) const
size_t Dimensions() const
Definition: vector.hpp:89
Vector< T2 > operator*(const Vector< T2 > &V) const
static void SetAllocator(int blockSize)
SparseMatrix< T > & operator=(const SparseMatrix< T > &M)