Fawkes API Fawkes Development Version
|
00001 00002 /*************************************************************************** 00003 * siftpp.cpp - siftpp based classifier 00004 * 00005 * Created: Sat Apr 12 10:15:23 2008 00006 * Copyright 2008 Stefan Schiffer [stefanschiffer.de] 00007 * 00008 ****************************************************************************/ 00009 00010 /* This program is free software; you can redistribute it and/or modify 00011 * it under the terms of the GNU General Public License as published by 00012 * the Free Software Foundation; either version 2 of the License, or 00013 * (at your option) any later version. A runtime exception applies to 00014 * this software (see LICENSE.GPL_WRE file mentioned below for details). 00015 * 00016 * This program is distributed in the hope that it will be useful, 00017 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00019 * GNU Library General Public License for more details. 00020 * 00021 * Read the full text in the LICENSE.GPL_WRE file in the doc directory. 00022 */ 00023 00024 #include <iostream> 00025 #include <vector> 00026 00027 #include <classifiers/siftpp.h> 00028 00029 //#ifdef SIFTPP_TIMETRACKER 00030 #include <utils/time/clock.h> 00031 #include <utils/time/tracker.h> 00032 //#endif 00033 00034 #include <core/exception.h> 00035 #include <core/exceptions/software.h> 00036 #include <fvutils/color/colorspaces.h> 00037 #include <fvutils/color/conversions.h> 00038 #include <fvutils/readers/png.h> 00039 //#include <fvutils/writers/pnm.h> 00040 //#include <fvutils/writers/png.h> 00041 00042 //using namespace fawkes; 00043 using namespace fawkes; 00044 00045 namespace firevision { 00046 #if 0 /* just to make Emacs auto-indent happy */ 00047 } 00048 #endif 00049 00050 /** @class SiftppClassifier <classifiers/siftpp.h> 00051 * SIFTPP classifier. 00052 * 00053 * This class provides a classifier that uses SIFTPP to detect objects in a given 00054 * image by matching features. The objects are reported back as regions of interest. 00055 * Each ROI contains an object. ROIs with 11x11 are matched features. 00056 * 00057 * This code uses siftpp from http://vision.ucla.edu/~vedaldi/code/siftpp/siftpp.html 00058 * and is partly based on code from their package. 00059 * 00060 * @author Stefan Schiffer 00061 */ 00062 00063 /** Constructor. 00064 * @param object_file file that contains an image of the object to detect 00065 * @param samplingStep Initial sampling step 00066 * @param octaves Number of analysed octaves 00067 * @param levels Number of levels per octave 00068 * @param magnif Keypoint magnification (default = 3) 00069 * @param noorient rotation invariance (0) or upright (1) 00070 * @param unnormalized Normalization of features (default 0) 00071 */ 00072 SiftppClassifier::SiftppClassifier( const char * object_file, 00073 int samplingStep, int octaves, int levels, 00074 float magnif, int noorient, int unnormalized) 00075 : Classifier("SiftppClassifier") 00076 { 00077 // params for FastHessian 00078 __samplingStep = samplingStep; 00079 __octaves = octaves; 00080 __levels = levels; 00081 // params for Descriptors 00082 __first = -1 ; 00083 __threshold = 0.04f / __levels / 2.0f ; 00084 __edgeThreshold = 10.0f; 00085 __magnif = magnif; 00086 __noorient = noorient; 00087 __unnormalized = unnormalized; 00088 00089 // descriptor vector length 00090 __vlen = 128; 00091 00092 00093 //#ifdef SIFTPP_TIMETRACKER 00094 __tt = new TimeTracker(); 00095 __loop_count = 0; 00096 __ttc_objconv = __tt->add_class("ObjectConvert"); 00097 __ttc_objfeat = __tt->add_class("ObjectFeatures"); 00098 __ttc_imgconv = __tt->add_class("ImageConvert"); 00099 __ttc_imgfeat = __tt->add_class("ImageFeatures"); 00100 __ttc_matchin = __tt->add_class("Matching"); 00101 __ttc_roimerg = __tt->add_class("MergeROIs"); 00102 //#endif 00103 00104 //#ifdef SIFTPP_TIMETRACKER 00105 __tt->ping_start(__ttc_objconv); 00106 //#endif 00107 00108 PNGReader pngr( object_file ); 00109 unsigned char* buf = malloc_buffer( pngr.colorspace(), pngr.pixel_width(), pngr.pixel_height() ); 00110 pngr.set_buffer( buf ); 00111 pngr.read(); 00112 00113 unsigned int lwidth = pngr.pixel_width(); 00114 unsigned int lheight = pngr.pixel_height(); 00115 VL::pixel_t * im_pt = new VL::pixel_t [lwidth * lheight ]; 00116 VL::pixel_t * start = im_pt; 00117 //VL::pixel_t* end = start + lwidth*lheight ; 00118 for (unsigned int h = 0; h < lheight; ++h) { 00119 for (unsigned int w = 0; w < lwidth ; ++w) { 00120 int i = (buf[h * lwidth + w] ); 00121 VL::pixel_t norm = VL::pixel_t( 255 ); 00122 *start++ = VL::pixel_t( i ) / norm; 00123 } 00124 } 00125 // make image 00126 __obj_img = new VL::PgmBuffer(); 00127 __obj_img->width = lwidth; 00128 __obj_img->height = lheight; 00129 __obj_img->data = im_pt; 00130 00131 if ( ! __obj_img ) { 00132 throw Exception("Could not load object file"); 00133 } 00134 00135 //#ifdef SIFTPP_TIMETRACKER 00136 __tt->ping_end(__ttc_objconv); 00137 //#endif 00138 00139 // save object image for debugging 00140 // 00141 00142 //#ifdef SIFTPP_TIMETRACKER 00143 __tt->ping_start(__ttc_objfeat); 00144 //#endif 00145 00146 // COMPUTE OBJECT FEATURES 00147 __obj_features.clear(); 00148 //__obj_features.reserve(1000); 00149 __obj_num_features = 0; 00150 00151 __sigman = .5 ; 00152 __sigma0 = 1.6 * powf(2.0f, 1.0f / __levels) ; 00153 00154 std::cout << "SiftppClassifier(ctor): init scalespace" << std::endl; 00155 // initialize scalespace 00156 VL::Sift sift(__obj_img->data, __obj_img->width, __obj_img->height, 00157 __sigman, __sigma0, __octaves, __levels, __first, -1, __levels+1) ; 00158 00159 std::cout << "SiftppClassifier(ctor): detect object keypoints" << std::endl; 00160 // Run SIFTPP detector 00161 sift.detectKeypoints(__threshold, __edgeThreshold) ; 00162 // Number of keypoints 00163 __obj_num_features = sift.keypointsEnd() - sift.keypointsBegin(); 00164 std::cout << "SiftppClassifier(ctor): computed '" << __obj_num_features << "' object-keypoints" << std::endl; 00165 00166 // set descriptor options 00167 sift.setNormalizeDescriptor( ! __unnormalized ) ; 00168 sift.setMagnification( __magnif ) ; 00169 00170 std::cout << "SiftppClassifier(ctor): run detector, compute ori and des ..." << std::endl; 00171 // Run detector, compute orientations and descriptors 00172 for( VL::Sift::KeypointsConstIter iter = sift.keypointsBegin() ; 00173 iter != sift.keypointsEnd() ; ++iter ) { 00174 00175 //Feature * feat = new Feature(); 00176 Feature feat; 00177 00178 //std::cout << "SiftppClassifier(ctor): saving keypoint" << std::endl; 00179 feat.key = (*iter); 00180 00181 // detect orientations 00182 VL::float_t angles [4] ; 00183 int nangles ; 00184 if( ! __noorient ) { 00185 nangles = sift.computeKeypointOrientations(angles, *iter) ; 00186 } else { 00187 nangles = 1; 00188 angles[0] = VL::float_t(0) ; 00189 } 00190 feat.number_of_desc = nangles; 00191 feat.descs = new VL::float_t*[nangles]; 00192 00193 //std::cout << "SiftppClassifier(ctor): computing '" << nangles << "' descriptors" << std::endl; 00194 // compute descriptors 00195 for(int a = 0 ; a < nangles ; ++a) { 00196 // out << setprecision(2) << iter->x << ' ' << setprecision(2) << iter->y << ' ' 00197 // << setprecision(2) << iter->sigma << ' ' << setprecision(3) << angles[a] ; 00198 // compute descriptor 00199 feat.descs[a] = new VL::float_t[__vlen]; 00200 sift.computeKeypointDescriptor(feat.descs[a], *iter, angles[a]) ; 00201 } // next angle 00202 //std::cout << "SiftppClassifier(ctor): computed '" << feat.number_of_desc << "' descriptors." << std::endl; 00203 00204 // save feature 00205 __obj_features.push_back( feat ); 00206 00207 } // next keypoint 00208 00209 __obj_num_features = __obj_features.size(); 00210 if ( ! __obj_num_features > 0 ) { 00211 throw Exception("Could not compute object features"); 00212 } 00213 std::cout << "SiftppClassifier(ctor): computed '" << __obj_num_features << "' features from object" << std::endl; 00214 00215 //#ifdef SIFTPP_TIMETRACKER 00216 __tt->ping_end(__ttc_objfeat); 00217 //#endif 00218 00219 } 00220 00221 00222 /** Destructor. */ 00223 SiftppClassifier::~SiftppClassifier() 00224 { 00225 // 00226 delete __obj_img; 00227 __obj_features.clear(); 00228 // 00229 //delete __image; 00230 __img_features.clear(); 00231 } 00232 00233 00234 std::list< ROI > * 00235 SiftppClassifier::classify() 00236 { 00237 //#ifdef SIFTPP_TIMETRACKER 00238 __tt->ping_start(0); 00239 //#endif 00240 00241 // list of ROIs to return 00242 std::list< ROI > *rv = new std::list< ROI >(); 00243 00244 // for ROI calculation 00245 int x_min = _width; 00246 int y_min = _height; 00247 int x_max = 0; 00248 int y_max = 0; 00249 00250 //#ifdef SIFTPP_TIMETRACKER 00251 __tt->ping_start(__ttc_imgconv); 00252 //#endif 00253 std::cout << "SiftppClassifier(classify): copy imgdat to SIFTPP Image" << std::endl; 00254 00255 VL::pixel_t * im_pt = new VL::pixel_t [_width * _height ]; 00256 VL::pixel_t * start = im_pt; 00257 for (unsigned int h = 0; h < _height; ++h) { 00258 for (unsigned int w = 0; w < _width ; ++w) { 00259 int i = (_src[h * _width + w] ); 00260 VL::pixel_t norm = VL::pixel_t( 255 ); 00261 *start++ = VL::pixel_t( i ) / norm; 00262 } 00263 } 00264 // make image 00265 __image = new VL::PgmBuffer(); 00266 __image->width = _width; 00267 __image->height = _height; 00268 __image->data = im_pt; 00269 00270 //#ifdef SIFTPP_TIMETRACKER 00271 __tt->ping_end(__ttc_imgconv); 00272 //#endif 00273 00274 /// Write image to verify correct operation 00275 // nothing yet 00276 00277 //#ifdef SIFTPP_TIMETRACKER 00278 __tt->ping_start(__ttc_imgfeat); 00279 //#endif 00280 00281 // COMPUTE IMAGE FEATURES 00282 __img_features.clear(); 00283 __img_num_features = 0; 00284 //__img_features.reserve(1000); 00285 00286 std::cout << "SiftppClassifier(classify): init scalespace" << std::endl; 00287 // initialize scalespace 00288 VL::Sift sift(__image->data, __image->width, __image->height, 00289 __sigman, __sigma0, __octaves, __levels, __first, -1, __levels+1) ; 00290 00291 std::cout << "SiftppClassifier(classify): detect image keypoints" << std::endl; 00292 // Run SIFTPP detector 00293 sift.detectKeypoints(__threshold, __edgeThreshold) ; 00294 00295 // Number of keypoints 00296 __img_num_features = sift.keypointsEnd() - sift.keypointsBegin(); 00297 std::cout << "SiftppClassifier(classify): Extracted '" << __img_num_features << "' image keypoints" << std::endl; 00298 00299 // set descriptor options 00300 sift.setNormalizeDescriptor( ! __unnormalized ) ; 00301 sift.setMagnification( __magnif ) ; 00302 00303 std::cout << "SiftppClassifier(classify): run detector, compute ori and des ..." << std::endl; 00304 // Run detector, compute orientations and descriptors 00305 for( VL::Sift::KeypointsConstIter iter = sift.keypointsBegin() ; 00306 iter != sift.keypointsEnd() ; ++iter ) { 00307 00308 Feature feat; // = new Feature(); 00309 00310 //std::cout << "SiftppClassifier(classify): saving keypoint" << std::endl; 00311 feat.key = (*iter); 00312 00313 //std::cout << "SiftppClassifier(classify): detect orientations" << std::endl; 00314 // detect orientations 00315 VL::float_t angles [4] ; 00316 int nangles ; 00317 if( ! __noorient ) { 00318 nangles = sift.computeKeypointOrientations(angles, *iter) ; 00319 } else { 00320 nangles = 1; 00321 angles[0] = VL::float_t(0) ; 00322 } 00323 feat.number_of_desc = nangles; 00324 feat.descs = new VL::float_t*[nangles]; 00325 00326 //std::cout << "SiftppClassifier(classify): computing '" << nangles << "' descriptors" << std::endl; 00327 // compute descriptors 00328 for(int a = 0 ; a < nangles ; ++a) { 00329 // compute descriptor 00330 feat.descs[a] = new VL::float_t[__vlen] ; 00331 sift.computeKeypointDescriptor(feat.descs[a], *iter, angles[a]) ; 00332 } // next angle 00333 //std::cout << "SiftppClassifier(classify): computed '" << feat.number_of_desc << "' descriptors." << std::endl; 00334 00335 // save feature 00336 __img_features.push_back( feat ); 00337 00338 } // next keypoint 00339 00340 // Number of feature 00341 __img_num_features = __img_features.size(); 00342 00343 //#ifdef SIFTPP_TIMETRACKER 00344 __tt->ping_end(__ttc_imgfeat); 00345 //#endif 00346 00347 std::cout << "SiftppClassifier(classify): Extracted '" << __img_num_features << "' image features" << std::endl; 00348 00349 //#ifdef SIFTPP_TIMETRACKER 00350 __tt->ping_start(__ttc_matchin); 00351 //#endif 00352 std::cout << "SiftppClassifier(classify): matching ..." << std::endl; 00353 00354 std::vector< int > matches(__obj_features.size()); 00355 int m = 0; 00356 for (unsigned i = 0; i < __obj_features.size(); i++) { 00357 int match = findMatch(__obj_features[i], __img_features); 00358 matches[i] = match; 00359 if (match != -1) { 00360 std::cout << "SiftppClassifier(classify): Matched feature " << i << " in object image with feature " << match << " in image." << std::endl; 00361 /// adding feature-ROI 00362 ROI r( (int)(__img_features[matches[i]].key.x)-5, (int)(__img_features[matches[i]].key.y )-5, 11, 11, _width, _height); 00363 rv->push_back(r); 00364 // increment feature-match-count 00365 ++m; 00366 } 00367 } 00368 00369 //#ifdef SIFTPP_TIMETRACKER 00370 __tt->ping_end(__ttc_matchin); 00371 //#endif 00372 std::cout << "SiftppClassifier(classify) matched '" << m << "' of '" << __obj_features.size() << "' features in scene." << std::endl; 00373 00374 std::cout << "SiftppClassifier(classify): computing ROI" << std::endl; 00375 //#ifdef SIFTPP_TIMETRACKER 00376 __tt->ping_start(__ttc_roimerg); 00377 //#endif 00378 00379 for (unsigned i = 0; i < matches.size(); i++) { 00380 if (matches[i] != -1) { 00381 if( (int)__img_features[matches[i]].key.x < x_min ) 00382 x_min = (int)__img_features[matches[i]].key.x; 00383 if( (int)__img_features[matches[i]].key.y < y_min ) 00384 y_min = (int)__img_features[matches[i]].key.y; 00385 if( (int)__img_features[matches[i]].key.x > x_max ) 00386 x_max = (int)__img_features[matches[i]].key.x; 00387 if( (int)__img_features[matches[i]].key.y > y_max ) 00388 y_max = (int)__img_features[matches[i]].key.y; 00389 } 00390 } 00391 if( m != 0 ) { 00392 ROI r(x_min, y_min, x_max-x_min, y_max-y_min, _width, _height); 00393 rv->push_back(r); 00394 } 00395 00396 //#ifdef SIFTPP_TIMETRACKER 00397 __tt->ping_end(__ttc_roimerg); 00398 //#endif 00399 00400 //#ifdef SIFTPP_TIMETRACKER 00401 __tt->ping_end(0); 00402 //#endif 00403 00404 //#ifdef SIFTPP_TIMETRACKER 00405 // print timetracker statistics 00406 __tt->print_to_stdout(); 00407 //#endif 00408 00409 delete __image; 00410 00411 std::cout << "SiftppClassifier(classify): done ... returning '" << rv->size() << "' ROIs." << std::endl; 00412 return rv; 00413 } 00414 00415 int 00416 SiftppClassifier::findMatch(const Feature & ip1, const std::vector< Feature > & ipts) { 00417 double mind = 1e100, second = 1e100; 00418 int match = -1; 00419 00420 for (unsigned i = 0; i < ipts.size(); i++) { 00421 00422 if (ipts[i].number_of_desc != ip1.number_of_desc) 00423 continue; 00424 //std::cout << "SiftppClassifier(findMatch): number_of_desc matched!" << std::endl; 00425 for ( int j = 0; j < ip1.number_of_desc; ++j ) { 00426 double d = distSquare(ipts[i].descs[j], ip1.descs[j], __vlen); 00427 00428 if (d < mind) { 00429 second = mind; 00430 mind = d; 00431 match = i; 00432 } else if (d < second) { 00433 second = d; 00434 } 00435 } 00436 } 00437 00438 if (mind < 0.5 * second) 00439 return match; 00440 00441 return -1; 00442 } 00443 00444 00445 double 00446 SiftppClassifier::distSquare(VL::float_t *v1, VL::float_t *v2, int n) { 00447 double dsq = 0.; 00448 while (n--) { 00449 dsq += (v1[n-1] - v2[n-1]) * (v1[n-1] - v2[n-1]); 00450 } 00451 //std::cout << " dsq: '" << dsq << "'" << std::endl; 00452 return dsq; 00453 } 00454 00455 } // end namespace firevision