Stxxl 1.2.1

priority_queue.h

00001 /***************************************************************************
00002  *  include/stxxl/bits/containers/priority_queue.h
00003  *
00004  *  Part of the STXXL. See http://stxxl.sourceforge.net
00005  *
00006  *  Copyright (C) 1999 Peter Sanders <sanders@mpi-sb.mpg.de>
00007  *  Copyright (C) 2003, 2004, 2007 Roman Dementiev <dementiev@mpi-sb.mpg.de>
00008  *  Copyright (C) 2007 Johannes Singler <singler@ira.uka.de>
00009  *  Copyright (C) 2007, 2008 Andreas Beckmann <beckmann@cs.uni-frankfurt.de>
00010  *
00011  *  Distributed under the Boost Software License, Version 1.0.
00012  *  (See accompanying file LICENSE_1_0.txt or copy at
00013  *  http://www.boost.org/LICENSE_1_0.txt)
00014  **************************************************************************/
00015 
00016 #ifndef STXXL_PRIORITY_QUEUE_HEADER
00017 #define STXXL_PRIORITY_QUEUE_HEADER
00018 
00019 #include <list>
00020 #include <iterator>
00021 #include <vector>
00022 
00023 #include <stxxl/bits/mng/mng.h>
00024 #include <stxxl/bits/mng/prefetch_pool.h>
00025 #include <stxxl/bits/mng/write_pool.h>
00026 #include <stxxl/bits/common/tmeta.h>
00027 
00028 
00029 __STXXL_BEGIN_NAMESPACE
00030 
00034 
00037 namespace priority_queue_local
00038 {
00045     template <typename _Tp, typename _Sequence = std::vector<_Tp>,
00046               typename _Compare = std::less<typename _Sequence::value_type> >
00047     class internal_priority_queue
00048     {
00049         // concept requirements
00050         typedef typename _Sequence::value_type _Sequence_value_type;
00051 
00052     public:
00053         typedef typename _Sequence::value_type value_type;
00054         typedef typename _Sequence::reference reference;
00055         typedef typename _Sequence::const_reference const_reference;
00056         typedef typename _Sequence::size_type size_type;
00057         typedef          _Sequence container_type;
00058 
00059     protected:
00060         //  See queue::c for notes on these names.
00061         _Sequence c;
00062         _Compare comp;
00063         size_type N;
00064 
00065     public:
00069         explicit
00070         internal_priority_queue(size_type capacity)
00071             : c(capacity), N(0)
00072         { }
00073 
00077         bool
00078         empty() const
00079         { return N == 0; }
00080 
00082         size_type
00083         size() const
00084         { return N; }
00085 
00090         const_reference
00091         top() const
00092         {
00093             return c.front();
00094         }
00095 
00104         void
00105         push(const value_type & __x)
00106         {
00107             c[N] = __x;
00108             ++N;
00109             std::push_heap(c.begin(), c.begin() + N, comp);
00110         }
00111 
00123         void
00124         pop()
00125         {
00126             std::pop_heap(c.begin(), c.begin() + N, comp);
00127             --N;
00128         }
00129 
00133         void sort_to(value_type * target)
00134         {
00135             std::sort(c.begin(), c.begin() + N, comp);
00136             std::reverse_copy(c.begin(), c.begin() + N, target);
00137         }
00138 
00142         void clear()
00143         {
00144             N = 0;
00145         }
00146     };
00147 
00148 
00150 // auxiliary functions
00151 
00152 // merge sz element from the two sentinel terminated input
00153 // sequences from0 and from1 to "to"
00154 // advance from0 and from1 accordingly
00155 // require: at least sz nonsentinel elements available in from0, from1
00156 // require: to may overwrite one of the sources as long as
00157 //   *(fromx + sz) is before the end of fromx
00158     template <class InputIterator, class OutputIterator, class Cmp_>
00159     void merge_iterator(
00160         InputIterator & from0,
00161         InputIterator & from1,
00162         OutputIterator to, unsigned_type sz, Cmp_ cmp)
00163     {
00164         OutputIterator done = to + sz;
00165 
00166         while (to != done)
00167         {
00168             if (cmp(*from0, *from1))
00169             {
00170                 *to = *from1;
00171                 ++from1;
00172             }
00173             else
00174             {
00175                 *to = *from0;
00176                 ++from0;
00177             }
00178             ++to;
00179         }
00180     }
00181 
00182 // merge sz element from the three sentinel terminated input
00183 // sequences from0, from1 and from2 to "to"
00184 // advance from0, from1 and from2 accordingly
00185 // require: at least sz nonsentinel elements available in from0, from1 and from2
00186 // require: to may overwrite one of the sources as long as
00187 //   *(fromx + sz) is before the end of fromx
00188     template <class InputIterator, class OutputIterator, class Cmp_>
00189     void merge3_iterator(
00190         InputIterator & from0,
00191         InputIterator & from1,
00192         InputIterator & from2,
00193         OutputIterator to, unsigned_type sz, Cmp_ cmp)
00194     {
00195         OutputIterator done = to + sz;
00196 
00197         if (cmp(*from1, *from0)) {
00198             if (cmp(*from2, *from1)) {
00199                 goto s012;
00200             }
00201             else {
00202                 if (cmp(*from0, *from2)) {
00203                     goto s201;
00204                 }
00205                 else {
00206                     goto s021;
00207                 }
00208             }
00209         } else {
00210             if (cmp(*from2, *from1)) {
00211                 if (cmp(*from2, *from0)) {
00212                     goto s102;
00213                 }
00214                 else {
00215                     goto s120;
00216                 }
00217             } else {
00218                 goto s210;
00219             }
00220         }
00221 
00222 #define Merge3Case(a, b, c) \
00223     s ## a ## b ## c : \
00224     if (to == done) \
00225         return;\
00226     *to = *from ## a; \
00227     ++to; \
00228     ++from ## a; \
00229     if (cmp(*from ## b, *from ## a)) \
00230         goto s ## a ## b ## c;\
00231     if (cmp(*from ## c, *from ## a)) \
00232         goto s ## b ## a ## c;\
00233     goto s ## b ## c ## a;
00234 
00235         // the order is chosen in such a way that
00236         // four of the trailing gotos can be eliminated by the optimizer
00237         Merge3Case(0, 1, 2);
00238         Merge3Case(1, 2, 0);
00239         Merge3Case(2, 0, 1);
00240         Merge3Case(1, 0, 2);
00241         Merge3Case(0, 2, 1);
00242         Merge3Case(2, 1, 0);
00243 
00244 #undef Merge3Case
00245     }
00246 
00247 
00248 // merge sz element from the four sentinel terminated input
00249 // sequences from0, from1, from2 and from3 to "to"
00250 // advance from0, from1, from2 and from3 accordingly
00251 // require: at least sz nonsentinel elements available in from0, from1, from2 and from3
00252 // require: to may overwrite one of the sources as long as
00253 //   *(fromx + sz) is before the end of fromx
00254     template <class InputIterator, class OutputIterator, class Cmp_>
00255     void merge4_iterator(
00256         InputIterator & from0,
00257         InputIterator & from1,
00258         InputIterator & from2,
00259         InputIterator & from3,
00260         OutputIterator to, unsigned_type sz, Cmp_ cmp)
00261     {
00262         OutputIterator done = to + sz;
00263 
00264 #define StartMerge4(a, b, c, d) \
00265     if ((!cmp(*from ## a, *from ## b)) && (!cmp(*from ## b, *from ## c)) && (!cmp(*from ## c, *from ## d))) \
00266         goto s ## a ## b ## c ## d;
00267 
00268         // b>a c>b d>c
00269         // a<b b<c c<d
00270         // a<=b b<=c c<=d
00271         // !(a>b) !(b>c) !(c>d)
00272 
00273         StartMerge4(0, 1, 2, 3);
00274         StartMerge4(1, 2, 3, 0);
00275         StartMerge4(2, 3, 0, 1);
00276         StartMerge4(3, 0, 1, 2);
00277 
00278         StartMerge4(0, 3, 1, 2);
00279         StartMerge4(3, 1, 2, 0);
00280         StartMerge4(1, 2, 0, 3);
00281         StartMerge4(2, 0, 3, 1);
00282 
00283         StartMerge4(0, 2, 3, 1);
00284         StartMerge4(2, 3, 1, 0);
00285         StartMerge4(3, 1, 0, 2);
00286         StartMerge4(1, 0, 2, 3);
00287 
00288         StartMerge4(2, 0, 1, 3);
00289         StartMerge4(0, 1, 3, 2);
00290         StartMerge4(1, 3, 2, 0);
00291         StartMerge4(3, 2, 0, 1);
00292 
00293         StartMerge4(3, 0, 2, 1);
00294         StartMerge4(0, 2, 1, 3);
00295         StartMerge4(2, 1, 3, 0);
00296         StartMerge4(1, 3, 0, 2);
00297 
00298         StartMerge4(1, 0, 3, 2);
00299         StartMerge4(0, 3, 2, 1);
00300         StartMerge4(3, 2, 1, 0);
00301         StartMerge4(2, 1, 0, 3);
00302 
00303 #define Merge4Case(a, b, c, d) \
00304     s ## a ## b ## c ## d : \
00305     if (to == done) \
00306         return;\
00307     *to = *from ## a; \
00308     ++to; \
00309     ++from ## a; \
00310     if (cmp(*from ## c, *from ## a)) \
00311     { \
00312         if (cmp(*from ## b, *from ## a)) \
00313             goto s ## a ## b ## c ## d;\
00314         else \
00315             goto s ## b ## a ## c ## d;\
00316     } \
00317     else \
00318     { \
00319         if (cmp(*from ## d, *from ## a)) \
00320             goto s ## b ## c ## a ## d;\
00321         else \
00322             goto s ## b ## c ## d ## a;\
00323     }
00324 
00325         Merge4Case(0, 1, 2, 3);
00326         Merge4Case(1, 2, 3, 0);
00327         Merge4Case(2, 3, 0, 1);
00328         Merge4Case(3, 0, 1, 2);
00329 
00330         Merge4Case(0, 3, 1, 2);
00331         Merge4Case(3, 1, 2, 0);
00332         Merge4Case(1, 2, 0, 3);
00333         Merge4Case(2, 0, 3, 1);
00334 
00335         Merge4Case(0, 2, 3, 1);
00336         Merge4Case(2, 3, 1, 0);
00337         Merge4Case(3, 1, 0, 2);
00338         Merge4Case(1, 0, 2, 3);
00339 
00340         Merge4Case(2, 0, 1, 3);
00341         Merge4Case(0, 1, 3, 2);
00342         Merge4Case(1, 3, 2, 0);
00343         Merge4Case(3, 2, 0, 1);
00344 
00345         Merge4Case(3, 0, 2, 1);
00346         Merge4Case(0, 2, 1, 3);
00347         Merge4Case(2, 1, 3, 0);
00348         Merge4Case(1, 3, 0, 2);
00349 
00350         Merge4Case(1, 0, 3, 2);
00351         Merge4Case(0, 3, 2, 1);
00352         Merge4Case(3, 2, 1, 0);
00353         Merge4Case(2, 1, 0, 3);
00354 
00355 #undef StartMerge4
00356 #undef Merge4Case
00357     }
00358 
00359 
00365     template <typename Tp_, unsigned_type max_size_>
00366     class internal_bounded_stack
00367     {
00368         typedef Tp_ value_type;
00369         typedef unsigned_type size_type;
00370         enum { max_size = max_size_ };
00371 
00372         size_type size_;
00373         value_type array[max_size];
00374 
00375     public:
00376         internal_bounded_stack() : size_(0) { }
00377 
00378         void push(const value_type & x)
00379         {
00380             assert(size_ < max_size);
00381             array[size_++] = x;
00382         }
00383 
00384         const value_type & top() const
00385         {
00386             assert(size_ > 0);
00387             return array[size_ - 1];
00388         }
00389 
00390         void pop()
00391         {
00392             assert(size_ > 0);
00393             --size_;
00394         }
00395 
00396         void clear()
00397         {
00398             size_ = 0;
00399         }
00400 
00401         size_type size() const
00402         {
00403             return size_;
00404         }
00405 
00406         bool empty() const
00407         {
00408             return size_ == 0;
00409         }
00410     };
00411 
00412 
00417     template <class BlockType_,
00418               class Cmp_,
00419               unsigned Arity_,
00420               class AllocStr_ = STXXL_DEFAULT_ALLOC_STRATEGY>
00421     class ext_merger : private noncopyable
00422     {
00423     public:
00424         typedef stxxl::uint64 size_type;
00425         typedef BlockType_ block_type;
00426         typedef typename block_type::bid_type bid_type;
00427         typedef typename block_type::value_type value_type;
00428         typedef Cmp_ comparator_type;
00429         typedef AllocStr_ alloc_strategy;
00430         typedef value_type Element;
00431         typedef typed_block<sizeof(value_type), value_type> sentinel_block_type;
00432 
00433         // KNKMAX / 2  <  arity  <=  KNKMAX
00434         enum { arity = Arity_, KNKMAX = 1UL << (LOG2<Arity_>::ceil) };
00435 
00436         block_type * convert_block_pointer(sentinel_block_type * arg)
00437         {
00438             return reinterpret_cast<block_type *>(arg);
00439         }
00440 
00441     protected:
00442         comparator_type cmp;
00443 
00444         bool is_sentinel(const Element & a) const
00445         {
00446             return !(cmp(cmp.min_value(), a)); // a <= cmp.min_value()
00447         }
00448 
00449         bool not_sentinel(const Element & a) const
00450         {
00451             return cmp(cmp.min_value(), a); // a > cmp.min_value()
00452         }
00453 
00454         struct sequence_state : private noncopyable
00455         {
00456             unsigned_type current;      //current index
00457             block_type * block;         //current block
00458             std::list<bid_type> * bids; //list of blocks forming this sequence
00459             comparator_type cmp;
00460             ext_merger * merger;
00461             bool allocated;
00462 
00464             const value_type & operator * () const
00465             {
00466                 return (*block)[current];
00467             }
00468 
00469             sequence_state() : bids(NULL), allocated(false)
00470             { }
00471 
00472             ~sequence_state()
00473             {
00474                 STXXL_VERBOSE2("ext_merger sequence_state::~sequence_state()");
00475                 if (bids != NULL)
00476                 {
00477                     block_manager * bm = block_manager::get_instance();
00478                     bm->delete_blocks(bids->begin(), bids->end());
00479                     delete bids;
00480                 }
00481             }
00482 
00483             void make_inf()
00484             {
00485                 current = 0;
00486                 (*block)[0] = cmp.min_value();
00487             }
00488 
00489             bool is_sentinel(const Element & a) const
00490             {
00491                 return !(cmp(cmp.min_value(), a));
00492             }
00493 
00494             bool not_sentinel(const Element & a) const
00495             {
00496                 return cmp(cmp.min_value(), a);
00497             }
00498 
00499             void swap(sequence_state & obj)
00500             {
00501                 if (&obj != this)
00502                 {
00503                     std::swap(current, obj.current);
00504                     std::swap(block, obj.block);
00505                     std::swap(bids, obj.bids);
00506                     assert(merger == obj.merger);
00507                     std::swap(allocated, obj.allocated);
00508                 }
00509             }
00510 
00511             sequence_state & operator ++ ()
00512             {
00513                 assert(not_sentinel((*block)[current]));
00514                 assert(current < block->size);
00515 
00516                 ++current;
00517 
00518                 if (current == block->size)
00519                 {
00520                     STXXL_VERBOSE2("ext_merger sequence_state operator++ crossing block border ");
00521                     // go to the next block
00522                     assert(bids != NULL);
00523                     if (bids->empty()) // if there is no next block
00524                     {
00525                         STXXL_VERBOSE2("ext_merger sequence_state operator++ it was the last block in the sequence ");
00526                         delete bids;
00527                         bids = NULL;
00528                         make_inf();
00529                     }
00530                     else
00531                     {
00532                         STXXL_VERBOSE2("ext_merger sequence_state operator++ there is another block ");
00533                         bid_type bid = bids->front();
00534                         bids->pop_front();
00535                         if (!(bids->empty()))
00536                         {
00537                             STXXL_VERBOSE2("ext_merger sequence_state operator++ one more block exists in a sequence: " <<
00538                                            "flushing this block in write cache (if not written yet) and giving hint to prefetcher");
00539                             bid_type next_bid = bids->front();
00540                             //Hint next block of sequence.
00541                             //This is mandatory to ensure proper synchronization between prefetch pool and write pool.
00542                             merger->p_pool->hint(next_bid, *(merger->w_pool));
00543                         }
00544                         merger->p_pool->read(block, bid)->wait();
00545                         STXXL_VERBOSE2("first element of read block " << bid << " " << *(block->begin()) << " cached in " << block);
00546                         block_manager::get_instance()->delete_block(bid);
00547                         current = 0;
00548                     }
00549                 }
00550                 return *this;
00551             }
00552         };
00553 
00554 
00555         //a pair consisting a value
00556         struct Entry
00557         {
00558             value_type key;      // Key of Loser element (winner for 0)
00559             unsigned_type index; // the number of losing segment
00560         };
00561 
00562         size_type size_;         // total number of elements stored
00563         unsigned_type logK;      // log of current tree size
00564         unsigned_type k;         // invariant (k == 1 << logK), always a power of two
00565         // only entries 0 .. arity-1 may hold actual sequences, the other
00566         // entries arity .. KNKMAX-1 are sentinels to make the size of the tree
00567         // a power of two always
00568 
00569         // stack of empty segment indices
00570         internal_bounded_stack<unsigned_type, arity> free_segments;
00571 
00572         // upper levels of loser trees
00573         // entry[0] contains the winner info
00574         Entry entry[KNKMAX];
00575 
00576         // leaf information
00577         // note that Knuth uses indices k..k-1
00578         // while we use 0..k-1
00579         sequence_state states[KNKMAX]; // sequence including current position, dereference gives current element
00580 
00581         prefetch_pool<block_type> * p_pool;
00582         write_pool<block_type> * w_pool;
00583 
00584         sentinel_block_type sentinel_block;
00585 
00586     public:
00587         ext_merger() :
00588             size_(0), logK(0), k(1), p_pool(0), w_pool(0)
00589         {
00590             init();
00591         }
00592 
00593         ext_merger(prefetch_pool<block_type> * p_pool_,
00594                    write_pool<block_type> * w_pool_) :
00595             size_(0), logK(0), k(1),
00596             p_pool(p_pool_),
00597             w_pool(w_pool_)
00598         {
00599             init();
00600         }
00601 
00602         virtual ~ext_merger()
00603         {
00604             STXXL_VERBOSE1("ext_merger::~ext_merger()");
00605             for (unsigned_type i = 0; i < arity; ++i)
00606             {
00607                 delete states[i].block;
00608             }
00609         }
00610 
00611         void set_pools(prefetch_pool<block_type> * p_pool_,
00612                        write_pool<block_type> * w_pool_)
00613         {
00614             p_pool = p_pool_;
00615             w_pool = w_pool_;
00616         }
00617 
00618     private:
00619         void init()
00620         {
00621             STXXL_VERBOSE2("ext_merger::init()");
00622             assert(!cmp(cmp.min_value(), cmp.min_value())); // verify strict weak ordering
00623 
00624             sentinel_block[0] = cmp.min_value();
00625 
00626             for (unsigned_type i = 0; i < KNKMAX; ++i)
00627             {
00628                 states[i].merger = this;
00629                 if (i < arity)
00630                     states[i].block = new block_type;
00631                 else
00632                     states[i].block = convert_block_pointer(&(sentinel_block));
00633 
00634                 states[i].make_inf();
00635             }
00636 
00637             assert(k == 1);
00638             free_segments.push(0); //total state: one free sequence
00639 
00640             rebuildLoserTree();
00641             assert(is_sentinel(*states[entry[0].index]));
00642         }
00643 
00644         // rebuild loser tree information from the values in current
00645         void rebuildLoserTree()
00646         {
00647             unsigned_type winner = initWinner(1);
00648             entry[0].index = winner;
00649             entry[0].key = *(states[winner]);
00650         }
00651 
00652 
00653         // given any values in the leaves this
00654         // routing recomputes upper levels of the tree
00655         // from scratch in linear time
00656         // initialize entry[root].index and the subtree rooted there
00657         // return winner index
00658         unsigned_type initWinner(unsigned_type root)
00659         {
00660             if (root >= k) { // leaf reached
00661                 return root - k;
00662             } else {
00663                 unsigned_type left = initWinner(2 * root);
00664                 unsigned_type right = initWinner(2 * root + 1);
00665                 Element lk = *(states[left]);
00666                 Element rk = *(states[right]);
00667                 if (!(cmp(lk, rk))) { // right subtree looses
00668                     entry[root].index = right;
00669                     entry[root].key = rk;
00670                     return left;
00671                 } else {
00672                     entry[root].index = left;
00673                     entry[root].key = lk;
00674                     return right;
00675                 }
00676             }
00677         }
00678 
00679         // first go up the tree all the way to the root
00680         // hand down old winner for the respective subtree
00681         // based on new value, and old winner and loser
00682         // update each node on the path to the root top down.
00683         // This is implemented recursively
00684         void update_on_insert(
00685             unsigned_type node,
00686             const Element & newKey,
00687             unsigned_type newIndex,
00688             Element * winnerKey,
00689             unsigned_type * winnerIndex,        // old winner
00690             unsigned_type * mask)               // 1 << (ceil(log KNK) - dist-from-root)
00691         {
00692             if (node == 0) {                    // winner part of root
00693                 *mask = 1 << (logK - 1);
00694                 *winnerKey = entry[0].key;
00695                 *winnerIndex = entry[0].index;
00696                 if (cmp(entry[node].key, newKey))
00697                 {
00698                     entry[node].key = newKey;
00699                     entry[node].index = newIndex;
00700                 }
00701             } else {
00702                 update_on_insert(node >> 1, newKey, newIndex, winnerKey, winnerIndex, mask);
00703                 Element loserKey = entry[node].key;
00704                 unsigned_type loserIndex = entry[node].index;
00705                 if ((*winnerIndex & *mask) != (newIndex & *mask)) { // different subtrees
00706                     if (cmp(loserKey, newKey)) {                    // newKey will have influence here
00707                         if (cmp(*winnerKey, newKey)) {              // old winner loses here
00708                             entry[node].key = *winnerKey;
00709                             entry[node].index = *winnerIndex;
00710                         } else {                                    // new entry looses here
00711                             entry[node].key = newKey;
00712                             entry[node].index = newIndex;
00713                         }
00714                     }
00715                     *winnerKey = loserKey;
00716                     *winnerIndex = loserIndex;
00717                 }
00718                 // note that nothing needs to be done if
00719                 // the winner came from the same subtree
00720                 // a) newKey <= winnerKey => even more reason for the other tree to loose
00721                 // b) newKey >  winnerKey => the old winner will beat the new
00722                 //                           entry further down the tree
00723                 // also the same old winner is handed down the tree
00724 
00725                 *mask >>= 1; // next level
00726             }
00727         }
00728 
00729         // make the tree two times as wide
00730         void doubleK()
00731         {
00732             STXXL_VERBOSE1("ext_merger::doubleK (before) k=" << k << " logK=" << logK << " KNKMAX=" << KNKMAX << " arity=" << arity << " #free=" << free_segments.size());
00733             assert(k > 0);
00734             assert(k < arity);
00735             assert(free_segments.empty());                 // stack was free (probably not needed)
00736 
00737             // make all new entries free
00738             // and push them on the free stack
00739             for (unsigned_type i = 2 * k - 1; i >= k; i--) //backwards
00740             {
00741                 states[i].make_inf();
00742                 if (i < arity)
00743                     free_segments.push(i);
00744             }
00745 
00746             // double the size
00747             k *= 2;
00748             logK++;
00749 
00750             STXXL_VERBOSE1("ext_merger::doubleK (after)  k=" << k << " logK=" << logK << " KNKMAX=" << KNKMAX << " arity=" << arity << " #free=" << free_segments.size());
00751             assert(!free_segments.empty());
00752 
00753             // recompute loser tree information
00754             rebuildLoserTree();
00755         }
00756 
00757 
00758         // compact nonempty segments in the left half of the tree
00759         void compactTree()
00760         {
00761             STXXL_VERBOSE1("ext_merger::compactTree (before) k=" << k << " logK=" << logK << " #free=" << free_segments.size());
00762             assert(logK > 0);
00763 
00764             // compact all nonempty segments to the left
00765 
00766             unsigned_type to = 0;
00767             for (unsigned_type from = 0; from < k; from++)
00768             {
00769                 if (!is_segment_empty(from))
00770                 {
00771                     assert(is_segment_allocated(from));
00772                     if (from != to) {
00773                         assert(!is_segment_allocated(to));
00774                         states[to].swap(states[from]);
00775                     }
00776                     ++to;
00777                 }
00778             }
00779 
00780             // half degree as often as possible
00781             while (k > 1 && to <= (k / 2)) {
00782                 k /= 2;
00783                 logK--;
00784             }
00785 
00786             // overwrite garbage and compact the stack of free segment indices
00787             free_segments.clear(); // none free
00788             for ( ;  to < k;  to++) {
00789                 assert(!is_segment_allocated(to));
00790                 states[to].make_inf();
00791                 if (to < arity)
00792                     free_segments.push(to);
00793             }
00794 
00795             STXXL_VERBOSE1("ext_merger::compactTree (after)  k=" << k << " logK=" << logK << " #free=" << free_segments.size());
00796             assert(k > 0);
00797 
00798             // recompute loser tree information
00799             rebuildLoserTree();
00800         }
00801 
00802 
00803 #if 0
00804         void swap(ext_merger & obj)
00805         {
00806             std::swap(cmp, obj.cmp);
00807             std::swap(free_segments, obj.free_segments);
00808             std::swap(size_, obj.size_);
00809             std::swap(logK, obj.logK);
00810             std::swap(k, obj.k);
00811             swap_1D_arrays(entry, obj.entry, KNKMAX);
00812             swap_1D_arrays(states, obj.states, KNKMAX);
00813 
00814             // std::swap(p_pool,obj.p_pool);
00815             // std::swap(w_pool,obj.w_pool);
00816         }
00817 #endif
00818 
00819     public:
00820         unsigned_type mem_cons() const // only rough estimation
00821         {
00822             return (arity * block_type::raw_size);
00823         }
00824 
00825         // delete the (length = end-begin) smallest elements and write them to "begin..end"
00826         // empty segments are deallocated
00827         // require:
00828         // - there are at least length elements
00829         // - segments are ended by sentinels
00830         template <class OutputIterator>
00831         void multi_merge(OutputIterator begin, OutputIterator end)
00832         {
00833             size_type length = end - begin;
00834 
00835             STXXL_VERBOSE1("ext_merger::multi_merge from " << k << " sequence(s), length = " << length);
00836 
00837             if (length == 0)
00838                 return;
00839 
00840             assert(k > 0);
00841             assert(length <= size_);
00842 
00843             //Hint first non-internal (actually second) block of each sequence.
00844             //This is mandatory to ensure proper synchronization between prefetch pool and write pool.
00845             for (unsigned_type i = 0; i < k; ++i)
00846             {
00847                 if (states[i].bids != NULL && !states[i].bids->empty())
00848                     p_pool->hint(states[i].bids->front(), *w_pool);
00849             }
00850 
00851             switch (logK) {
00852             case 0:
00853                 assert(k == 1);
00854                 assert(entry[0].index == 0);
00855                 assert(free_segments.empty());
00856                 //memcpy(to, states[0], length * sizeof(Element));
00857                 //std::copy(states[0],states[0]+length,to);
00858                 for (size_type i = 0; i < length; ++i, ++(states[0]), ++begin)
00859                     *begin = *(states[0]);
00860 
00861                 entry[0].key = **states;
00862                 if (is_segment_empty(0))
00863                     deallocate_segment(0);
00864 
00865                 break;
00866             case 1:
00867                 assert(k == 2);
00868                 merge_iterator(states[0], states[1], begin, length, cmp);
00869                 rebuildLoserTree();
00870                 if (is_segment_empty(0) && is_segment_allocated(0))
00871                     deallocate_segment(0);
00872 
00873                 if (is_segment_empty(1) && is_segment_allocated(1))
00874                     deallocate_segment(1);
00875 
00876                 break;
00877             case 2:
00878                 assert(k == 4);
00879                 if (is_segment_empty(3))
00880                     merge3_iterator(states[0], states[1], states[2], begin, length, cmp);
00881                 else
00882                     merge4_iterator(states[0], states[1], states[2], states[3], begin, length, cmp);
00883                 rebuildLoserTree();
00884                 if (is_segment_empty(0) && is_segment_allocated(0))
00885                     deallocate_segment(0);
00886 
00887                 if (is_segment_empty(1) && is_segment_allocated(1))
00888                     deallocate_segment(1);
00889 
00890                 if (is_segment_empty(2) && is_segment_allocated(2))
00891                     deallocate_segment(2);
00892 
00893                 if (is_segment_empty(3) && is_segment_allocated(3))
00894                     deallocate_segment(3);
00895 
00896                 break;
00897             case  3: multi_merge_f<OutputIterator, 3>(begin, end);
00898                 break;
00899             case  4: multi_merge_f<OutputIterator, 4>(begin, end);
00900                 break;
00901             case  5: multi_merge_f<OutputIterator, 5>(begin, end);
00902                 break;
00903             case  6: multi_merge_f<OutputIterator, 6>(begin, end);
00904                 break;
00905             case  7: multi_merge_f<OutputIterator, 7>(begin, end);
00906                 break;
00907             case  8: multi_merge_f<OutputIterator, 8>(begin, end);
00908                 break;
00909             case  9: multi_merge_f<OutputIterator, 9>(begin, end);
00910                 break;
00911             case 10: multi_merge_f<OutputIterator, 10>(begin, end);
00912                 break;
00913             default: multi_merge_k(begin, end);
00914                 break;
00915             }
00916 
00917 
00918             size_ -= length;
00919 
00920             // compact tree if it got considerably smaller
00921             {
00922                 const unsigned_type num_segments_used = std::min<unsigned_type>(arity, k) - free_segments.size();
00923                 const unsigned_type num_segments_trigger = k - (3 * k / 5);
00924                 // using k/2 would be worst case inefficient (for large k)
00925                 // for k \in {2, 4, 8} the trigger is k/2 which is good
00926                 // because we have special mergers for k \in {1, 2, 4}
00927                 // there is also a special 3-way-merger, that will be
00928                 // triggered if k == 4 && is_segment_empty(3)
00929                 STXXL_VERBOSE3("ext_merger  compact? k=" << k << " #used=" << num_segments_used
00930                                                          << " <= #trigger=" << num_segments_trigger << " ==> "
00931                                                          << ((k > 1 && num_segments_used <= num_segments_trigger) ? "yes" : "no ")
00932                                                          << " || "
00933                                                          << ((k == 4 && !free_segments.empty() && !is_segment_empty(3)) ? "yes" : "no ")
00934                                                          << " #free=" << free_segments.size());
00935                 if (k > 1 && ((num_segments_used <= num_segments_trigger) ||
00936                               (k == 4 && !free_segments.empty() && !is_segment_empty(3))))
00937                 {
00938                     compactTree();
00939                 }
00940             }
00941         }
00942 
00943     private:
00944         // multi-merge for arbitrary K
00945         template <class OutputIterator>
00946         void multi_merge_k(OutputIterator begin, OutputIterator end)
00947         {
00948             Entry * currentPos;
00949             Element currentKey;
00950             unsigned_type currentIndex; // leaf pointed to by current entry
00951             unsigned_type kReg = k;
00952             OutputIterator done = end;
00953             OutputIterator to = begin;
00954             unsigned_type winnerIndex = entry[0].index;
00955             Element winnerKey = entry[0].key;
00956 
00957             while (to != done)
00958             {
00959                 // write result
00960                 *to = *(states[winnerIndex]);
00961 
00962                 // advance winner segment
00963                 ++(states[winnerIndex]);
00964 
00965                 winnerKey = *(states[winnerIndex]);
00966 
00967                 // remove winner segment if empty now
00968                 if (is_sentinel(winnerKey)) //
00969                     deallocate_segment(winnerIndex);
00970 
00971 
00972                 // go up the entry-tree
00973                 for (unsigned_type i = (winnerIndex + kReg) >> 1;  i > 0;  i >>= 1) {
00974                     currentPos = entry + i;
00975                     currentKey = currentPos->key;
00976                     if (cmp(winnerKey, currentKey)) {
00977                         currentIndex = currentPos->index;
00978                         currentPos->key = winnerKey;
00979                         currentPos->index = winnerIndex;
00980                         winnerKey = currentKey;
00981                         winnerIndex = currentIndex;
00982                     }
00983                 }
00984 
00985                 ++to;
00986             }
00987             entry[0].index = winnerIndex;
00988             entry[0].key = winnerKey;
00989         }
00990 
00991         template <class OutputIterator, unsigned LogK>
00992         void multi_merge_f(OutputIterator begin, OutputIterator end)
00993         {
00994             OutputIterator done = end;
00995             OutputIterator to = begin;
00996             unsigned_type winnerIndex = entry[0].index;
00997             Entry * regEntry = entry;
00998             sequence_state * regStates = states;
00999             Element winnerKey = entry[0].key;
01000 
01001             assert(logK >= LogK);
01002             while (to != done)
01003             {
01004                 // write result
01005                 *to = *(regStates[winnerIndex]);
01006 
01007                 // advance winner segment
01008                 ++(regStates[winnerIndex]);
01009 
01010                 winnerKey = *(regStates[winnerIndex]);
01011 
01012 
01013                 // remove winner segment if empty now
01014                 if (is_sentinel(winnerKey))
01015                     deallocate_segment(winnerIndex);
01016 
01017 
01018                 ++to;
01019 
01020                 // update loser tree
01021 #define TreeStep(L) \
01022     if (1 << LogK >= 1 << L) { \
01023         Entry * pos ## L = regEntry + ((winnerIndex + (1 << LogK)) >> (((int(LogK - L) + 1) >= 0) ? ((LogK - L) + 1) : 0)); \
01024         Element key ## L = pos ## L->key; \
01025         if (cmp(winnerKey, key ## L)) { \
01026             unsigned_type index ## L = pos ## L->index; \
01027             pos ## L->key = winnerKey; \
01028             pos ## L->index = winnerIndex; \
01029             winnerKey = key ## L; \
01030             winnerIndex = index ## L; \
01031         } \
01032     }
01033                 TreeStep(10);
01034                 TreeStep(9);
01035                 TreeStep(8);
01036                 TreeStep(7);
01037                 TreeStep(6);
01038                 TreeStep(5);
01039                 TreeStep(4);
01040                 TreeStep(3);
01041                 TreeStep(2);
01042                 TreeStep(1);
01043 #undef TreeStep
01044             }
01045             regEntry[0].index = winnerIndex;
01046             regEntry[0].key = winnerKey;
01047         }
01048 
01049     public:
01050         bool spaceIsAvailable() const // for new segment
01051         {
01052             return k < arity || !free_segments.empty();
01053         }
01054 
01055 
01056         // insert segment beginning at to
01057         // require: spaceIsAvailable() == 1
01058         template <class Merger>
01059         void insert_segment(Merger & another_merger, size_type segment_size)
01060         {
01061             STXXL_VERBOSE1("ext_merger::insert_segment(merger,...)" << this);
01062 
01063             if (segment_size > 0)
01064             {
01065                 // get a free slot
01066                 if (free_segments.empty()) { // tree is too small
01067                     doubleK();
01068                 }
01069                 assert(!free_segments.empty());
01070                 unsigned_type free_slot = free_segments.top();
01071                 free_segments.pop();
01072 
01073 
01074                 // link new segment
01075                 assert(segment_size);
01076                 unsigned_type nblocks = segment_size / block_type::size;
01077                 //assert(nblocks); // at least one block
01078                 STXXL_VERBOSE1("ext_merger::insert_segment nblocks=" << nblocks);
01079                 if (nblocks == 0)
01080                 {
01081                     STXXL_VERBOSE1("ext_merger::insert_segment(merger,...) WARNING: inserting a segment with " <<
01082                                    nblocks << " blocks");
01083                     STXXL_VERBOSE1("THIS IS INEFFICIENT: TRY TO CHANGE PRIORITY QUEUE PARAMETERS");
01084                 }
01085                 unsigned_type first_size = segment_size % block_type::size;
01086                 if (first_size == 0)
01087                 {
01088                     first_size = block_type::size;
01089                     --nblocks;
01090                 }
01091                 block_manager * bm = block_manager::get_instance();
01092                 std::list<bid_type> * bids = new std::list<bid_type>(nblocks);
01093                 bm->new_blocks(alloc_strategy(), bids->begin(), bids->end());
01094                 block_type * first_block = new block_type;
01095 
01096                 another_merger.multi_merge(
01097                     first_block->begin() + (block_type::size - first_size),
01098                     first_block->end());
01099 
01100                 STXXL_VERBOSE1("last element of first block " << *(first_block->end() - 1));
01101                 assert(!cmp(*(first_block->begin() + (block_type::size - first_size)), *(first_block->end() - 1)));
01102 
01103                 assert(w_pool->size() > 0);
01104 
01105                 for (typename std::list<bid_type>::iterator curbid = bids->begin(); curbid != bids->end(); ++curbid)
01106                 {
01107                     block_type * b = w_pool->steal();
01108                     another_merger.multi_merge(b->begin(), b->end());
01109                     STXXL_VERBOSE1("first element of following block " << *curbid << " " << *(b->begin()));
01110                     STXXL_VERBOSE1("last element of following block " << *curbid << " " << *(b->end() - 1));
01111                     assert(!cmp(*(b->begin()), *(b->end() - 1)));
01112                     w_pool->write(b, *curbid); //->wait() does not help
01113                     STXXL_VERBOSE1("written to block " << *curbid << " cached in " << b);
01114                 }
01115 
01116                 insert_segment(bids, first_block, first_size, free_slot);
01117 
01118                 size_ += segment_size;
01119 
01120                 // propagate new information up the tree
01121                 Element dummyKey;
01122                 unsigned_type dummyIndex;
01123                 unsigned_type dummyMask;
01124                 update_on_insert((free_slot + k) >> 1, *(states[free_slot]), free_slot,
01125                                  &dummyKey, &dummyIndex, &dummyMask);
01126             } else {
01127                 // deallocate memory ?
01128                 STXXL_VERBOSE1("Merged segment with zero size.");
01129             }
01130         }
01131 
01132         size_type size() const { return size_; }
01133 
01134     protected:
01137         void insert_segment(std::list<bid_type> * bidlist, block_type * first_block,
01138                             unsigned_type first_size, unsigned_type slot)
01139         {
01140             STXXL_VERBOSE1("ext_merger::insert_segment(bidlist,...) " << this << " " << bidlist->size() << " " << slot);
01141             assert(!is_segment_allocated(slot));
01142             assert(first_size > 0);
01143 
01144             sequence_state & new_sequence = states[slot];
01145             new_sequence.current = block_type::size - first_size;
01146             std::swap(new_sequence.block, first_block);
01147             delete first_block;
01148             std::swap(new_sequence.bids, bidlist);
01149             if (bidlist) // the old list
01150             {
01151                 assert(bidlist->empty());
01152                 delete bidlist;
01153             }
01154             new_sequence.allocated = true;
01155             assert(is_segment_allocated(slot));
01156         }
01157 
01158         // free an empty segment .
01159         void deallocate_segment(unsigned_type slot)
01160         {
01161             STXXL_VERBOSE1("ext_merger::deallocate_segment() deleting segment " << slot << " allocated=" << int(is_segment_allocated(slot)));
01162             assert(is_segment_allocated(slot));
01163             states[slot].allocated = false;
01164             states[slot].make_inf();
01165 
01166             // push on the stack of free segment indices
01167             free_segments.push(slot);
01168         }
01169 
01170         // is this segment empty ?
01171         bool is_segment_empty(unsigned_type slot) const
01172         {
01173             return is_sentinel(*(states[slot]));
01174         }
01175 
01176         // Is this segment allocated? Otherwise it's empty,
01177         // already on the stack of free segment indices and can be reused.
01178         bool is_segment_allocated(unsigned_type slot) const
01179         {
01180             return states[slot].allocated;
01181         }
01182     }; //ext_merger
01183 
01184 
01186 // The data structure from Knuth, "Sorting and Searching", Section 5.4.1
01191     template <class ValTp_, class Cmp_, unsigned KNKMAX>
01192     class loser_tree : private noncopyable
01193     {
01194     public:
01195         typedef ValTp_ value_type;
01196         typedef Cmp_ comparator_type;
01197         typedef value_type Element;
01198 
01199     private:
01200         struct Entry
01201         {
01202             value_type key;      // Key of Loser element (winner for 0)
01203             unsigned_type index; // number of losing segment
01204         };
01205 
01206         comparator_type cmp;
01207         // stack of free segment indices
01208         internal_bounded_stack<unsigned_type, KNKMAX> free_segments;
01209 
01210         unsigned_type size_; // total number of elements stored
01211         unsigned_type logK;  // log of current tree size
01212         unsigned_type k;     // invariant (k == 1 << logK), always a power of two
01213 
01214         Element sentinel;    // target of free segment pointers
01215 
01216         // upper levels of loser trees
01217         // entry[0] contains the winner info
01218         Entry entry[KNKMAX];
01219 
01220         // leaf information
01221         // note that Knuth uses indices k..k-1
01222         // while we use 0..k-1
01223         Element * current[KNKMAX];          // pointer to current element
01224         Element * segment[KNKMAX];          // start of Segments
01225         unsigned_type segment_size[KNKMAX]; // just to count the internal memory consumption
01226 
01227         unsigned_type mem_cons_;
01228 
01229         // private member functions
01230         unsigned_type initWinner(unsigned_type root);
01231         void update_on_insert(unsigned_type node, const Element & newKey, unsigned_type newIndex,
01232                               Element * winnerKey, unsigned_type * winnerIndex, unsigned_type * mask);
01233         void deallocate_segment(unsigned_type slot);
01234         void doubleK();
01235         void compactTree();
01236         void rebuildLoserTree();
01237         bool is_segment_empty(unsigned_type slot);
01238         void multi_merge_k(Element * to, unsigned_type length);
01239 
01240         template <unsigned LogK>
01241         void multi_merge_f(Element * to, unsigned_type length)
01242         {
01243             //Entry *currentPos;
01244             //Element currentKey;
01245             //int currentIndex; // leaf pointed to by current entry
01246             Element * done = to + length;
01247             Entry * regEntry = entry;
01248             Element ** regStates = current;
01249             unsigned_type winnerIndex = regEntry[0].index;
01250             Element winnerKey = regEntry[0].key;
01251             Element * winnerPos;
01252             //Element sup = sentinel; // supremum
01253 
01254             assert(logK >= LogK);
01255             while (to != done)
01256             {
01257                 winnerPos = regStates[winnerIndex];
01258 
01259                 // write result
01260                 *to = winnerKey;
01261 
01262                 // advance winner segment
01263                 ++winnerPos;
01264                 regStates[winnerIndex] = winnerPos;
01265                 winnerKey = *winnerPos;
01266 
01267                 // remove winner segment if empty now
01268                 if (is_sentinel(winnerKey))
01269                 {
01270                     deallocate_segment(winnerIndex);
01271                 }
01272                 ++to;
01273 
01274                 // update loser tree
01275 #define TreeStep(L) \
01276     if (1 << LogK >= 1 << L) { \
01277         Entry * pos ## L = regEntry + ((winnerIndex + (1 << LogK)) >> (((int(LogK - L) + 1) >= 0) ? ((LogK - L) + 1) : 0)); \
01278         Element key ## L = pos ## L->key; \
01279         if (cmp(winnerKey, key ## L)) { \
01280             unsigned_type index ## L = pos ## L->index; \
01281             pos ## L->key = winnerKey; \
01282             pos ## L->index = winnerIndex; \
01283             winnerKey = key ## L; \
01284             winnerIndex = index ## L; \
01285         } \
01286     }
01287                 TreeStep(10);
01288                 TreeStep(9);
01289                 TreeStep(8);
01290                 TreeStep(7);
01291                 TreeStep(6);
01292                 TreeStep(5);
01293                 TreeStep(4);
01294                 TreeStep(3);
01295                 TreeStep(2);
01296                 TreeStep(1);
01297 #undef TreeStep
01298             }
01299             regEntry[0].index = winnerIndex;
01300             regEntry[0].key = winnerKey;
01301         }
01302 
01303     public:
01304         bool is_sentinel(const Element & a)
01305         {
01306             return !(cmp(cmp.min_value(), a));
01307         }
01308         bool not_sentinel(const Element & a)
01309         {
01310             return cmp(cmp.min_value(), a);
01311         }
01312 
01313     public:
01314         loser_tree();
01315         ~loser_tree();
01316         void init();
01317 
01318         void swap(loser_tree & obj)
01319         {
01320             std::swap(cmp, obj.cmp);
01321             std::swap(free_segments, obj.free_segments);
01322             std::swap(size_, obj.size_);
01323             std::swap(logK, obj.logK);
01324             std::swap(k, obj.k);
01325             std::swap(sentinel, obj.sentinel);
01326             swap_1D_arrays(entry, obj.entry, KNKMAX);
01327             swap_1D_arrays(current, obj.current, KNKMAX);
01328             swap_1D_arrays(segment, obj.segment, KNKMAX);
01329             swap_1D_arrays(segment_size, obj.segment_size, KNKMAX);
01330             std::swap(mem_cons_, obj.mem_cons_);
01331         }
01332 
01333         void multi_merge(Element * begin, Element * end)
01334         {
01335             multi_merge(begin, end - begin);
01336         }
01337         void multi_merge(Element *, unsigned_type length);
01338 
01339         unsigned_type mem_cons() const { return mem_cons_; }
01340 
01341         bool spaceIsAvailable() const // for new segment
01342         {
01343             return k < KNKMAX || !free_segments.empty();
01344         }
01345 
01346         void insert_segment(Element * to, unsigned_type sz); // insert segment beginning at to
01347         unsigned_type size() { return size_; }
01348     };
01349 
01351     template <class ValTp_, class Cmp_, unsigned KNKMAX>
01352     loser_tree<ValTp_, Cmp_, KNKMAX>::loser_tree() : size_(0), logK(0), k(1), mem_cons_(0)
01353     {
01354         free_segments.push(0);
01355         segment[0] = 0;
01356         current[0] = &sentinel;
01357         // entry and sentinel are initialized by init
01358         // since they need the value of supremum
01359         init();
01360     }
01361 
01362     template <class ValTp_, class Cmp_, unsigned KNKMAX>
01363     void loser_tree<ValTp_, Cmp_, KNKMAX>::init()
01364     {
01365         assert(!cmp(cmp.min_value(), cmp.min_value())); // verify strict weak ordering
01366         sentinel = cmp.min_value();
01367         rebuildLoserTree();
01368         assert(current[entry[0].index] == &sentinel);
01369     }
01370 
01371 
01372 // rebuild loser tree information from the values in current
01373     template <class ValTp_, class Cmp_, unsigned KNKMAX>
01374     void loser_tree<ValTp_, Cmp_, KNKMAX>::rebuildLoserTree()
01375     {
01376         assert(LOG2<KNKMAX>::floor == LOG2<KNKMAX>::ceil); // KNKMAX needs to be a power of two
01377         unsigned_type winner = initWinner(1);
01378         entry[0].index = winner;
01379         entry[0].key = *(current[winner]);
01380     }
01381 
01382 
01383 // given any values in the leaves this
01384 // routing recomputes upper levels of the tree
01385 // from scratch in linear time
01386 // initialize entry[root].index and the subtree rooted there
01387 // return winner index
01388     template <class ValTp_, class Cmp_, unsigned KNKMAX>
01389     unsigned_type loser_tree<ValTp_, Cmp_, KNKMAX>::initWinner(unsigned_type root)
01390     {
01391         if (root >= k) { // leaf reached
01392             return root - k;
01393         } else {
01394             unsigned_type left = initWinner(2 * root);
01395             unsigned_type right = initWinner(2 * root + 1);
01396             Element lk = *(current[left]);
01397             Element rk = *(current[right]);
01398             if (!(cmp(lk, rk))) { // right subtree looses
01399                 entry[root].index = right;
01400                 entry[root].key = rk;
01401                 return left;
01402             } else {
01403                 entry[root].index = left;
01404                 entry[root].key = lk;
01405                 return right;
01406             }
01407         }
01408     }
01409 
01410 
01411 // first go up the tree all the way to the root
01412 // hand down old winner for the respective subtree
01413 // based on new value, and old winner and loser
01414 // update each node on the path to the root top down.
01415 // This is implemented recursively
01416     template <class ValTp_, class Cmp_, unsigned KNKMAX>
01417     void loser_tree<ValTp_, Cmp_, KNKMAX>::update_on_insert(
01418         unsigned_type node,
01419         const Element & newKey,
01420         unsigned_type newIndex,
01421         Element * winnerKey,
01422         unsigned_type * winnerIndex,       // old winner
01423         unsigned_type * mask)              // 1 << (ceil(log KNK) - dist-from-root)
01424     {
01425         if (node == 0) {                   // winner part of root
01426             *mask = 1 << (logK - 1);
01427             *winnerKey = entry[0].key;
01428             *winnerIndex = entry[0].index;
01429             if (cmp(entry[node].key, newKey))
01430             {
01431                 entry[node].key = newKey;
01432                 entry[node].index = newIndex;
01433             }
01434         } else {
01435             update_on_insert(node >> 1, newKey, newIndex, winnerKey, winnerIndex, mask);
01436             Element loserKey = entry[node].key;
01437             unsigned_type loserIndex = entry[node].index;
01438             if ((*winnerIndex & *mask) != (newIndex & *mask)) { // different subtrees
01439                 if (cmp(loserKey, newKey)) {                    // newKey will have influence here
01440                     if (cmp(*winnerKey, newKey)) {              // old winner loses here
01441                         entry[node].key = *winnerKey;
01442                         entry[node].index = *winnerIndex;
01443                     } else {                                    // new entry looses here
01444                         entry[node].key = newKey;
01445                         entry[node].index = newIndex;
01446                     }
01447                 }
01448                 *winnerKey = loserKey;
01449                 *winnerIndex = loserIndex;
01450             }
01451             // note that nothing needs to be done if
01452             // the winner came from the same subtree
01453             // a) newKey <= winnerKey => even more reason for the other tree to loose
01454             // b) newKey >  winnerKey => the old winner will beat the new
01455             //                           entry further down the tree
01456             // also the same old winner is handed down the tree
01457 
01458             *mask >>= 1; // next level
01459         }
01460     }
01461 
01462 
01463 // make the tree two times as wide
01464     template <class ValTp_, class Cmp_, unsigned KNKMAX>
01465     void loser_tree<ValTp_, Cmp_, KNKMAX>::doubleK()
01466     {
01467         STXXL_VERBOSE3("loser_tree::doubleK (before) k=" << k << " logK=" << logK << " KNKMAX=" << KNKMAX << " #free=" << free_segments.size());
01468         assert(k > 0);
01469         assert(k < KNKMAX);
01470         assert(free_segments.empty());                   // stack was free (probably not needed)
01471 
01472         // make all new entries free
01473         // and push them on the free stack
01474         for (unsigned_type i = 2 * k - 1;  i >= k;  i--) // backwards
01475         {
01476             current[i] = &sentinel;
01477             segment[i] = NULL;
01478             free_segments.push(i);
01479         }
01480 
01481         // double the size
01482         k *= 2;
01483         logK++;
01484 
01485         STXXL_VERBOSE3("loser_tree::doubleK (after)  k=" << k << " logK=" << logK << " KNKMAX=" << KNKMAX << " #free=" << free_segments.size());
01486         assert(!free_segments.empty());
01487 
01488         // recompute loser tree information
01489         rebuildLoserTree();
01490     }
01491 
01492 
01493 // compact nonempty segments in the left half of the tree
01494     template <class ValTp_, class Cmp_, unsigned KNKMAX>
01495     void loser_tree<ValTp_, Cmp_, KNKMAX>::compactTree()
01496     {
01497         STXXL_VERBOSE3("loser_tree::compactTree (before) k=" << k << " logK=" << logK << " #free=" << free_segments.size());
01498         assert(logK > 0);
01499 
01500         // compact all nonempty segments to the left
01501         unsigned_type from = 0;
01502         unsigned_type to = 0;
01503         for ( ;  from < k;  from++)
01504         {
01505             if (not_sentinel(*(current[from])))
01506             {
01507                 segment_size[to] = segment_size[from];
01508                 current[to] = current[from];
01509                 segment[to] = segment[from];
01510                 to++;
01511             }/*
01512                 else
01513                 {
01514                 if(segment[from])
01515                 {
01516                 STXXL_VERBOSE2("loser_tree::compactTree() deleting segment "<<from<<
01517                                         " address: "<<segment[from]<<" size: "<<segment_size[from]);
01518                 delete [] segment[from];
01519                 segment[from] = 0;
01520                 mem_cons_ -= segment_size[from];
01521                 }
01522                 }*/
01523         }
01524 
01525         // half degree as often as possible
01526         while (k > 1 && to <= (k / 2)) {
01527             k /= 2;
01528             logK--;
01529         }
01530 
01531         // overwrite garbage and compact the stack of free segment indices
01532         free_segments.clear(); // none free
01533         for ( ;  to < k;  to++) {
01534             current[to] = &sentinel;
01535             free_segments.push(to);
01536         }
01537 
01538         STXXL_VERBOSE3("loser_tree::compactTree (after)  k=" << k << " logK=" << logK << " #free=" << free_segments.size());
01539 
01540         // recompute loser tree information
01541         rebuildLoserTree();
01542     }
01543 
01544 
01545 // insert segment beginning at to
01546 // require: spaceIsAvailable() == 1
01547     template <class ValTp_, class Cmp_, unsigned KNKMAX>
01548     void loser_tree<ValTp_, Cmp_, KNKMAX>::insert_segment(Element * to, unsigned_type sz)
01549     {
01550         STXXL_VERBOSE2("loser_tree::insert_segment(" << to << "," << sz << ")");
01551         //std::copy(to,to + sz,std::ostream_iterator<ValTp_>(std::cout, "\n"));
01552 
01553         if (sz > 0)
01554         {
01555             assert(not_sentinel(to[0]));
01556             assert(not_sentinel(to[sz - 1]));
01557             assert(is_sentinel(to[sz]));
01558 
01559             // get a free slot
01560             if (free_segments.empty()) { // tree is too small
01561                 doubleK();
01562             }
01563             assert(!free_segments.empty());
01564             unsigned_type index = free_segments.top();
01565             free_segments.pop();
01566 
01567 
01568             // link new segment
01569             current[index] = segment[index] = to;
01570             segment_size[index] = (sz + 1) * sizeof(value_type);
01571             mem_cons_ += (sz + 1) * sizeof(value_type);
01572             size_ += sz;
01573 
01574             // propagate new information up the tree
01575             Element dummyKey;
01576             unsigned_type dummyIndex;
01577             unsigned_type dummyMask;
01578             update_on_insert((index + k) >> 1, *to, index,
01579                              &dummyKey, &dummyIndex, &dummyMask);
01580         } else {
01581             // immediately deallocate
01582             // this is not only an optimization
01583             // but also needed to keep free segments from
01584             // clogging up the tree
01585             delete[] to;
01586         }
01587     }
01588 
01589 
01590     template <class ValTp_, class Cmp_, unsigned KNKMAX>
01591     loser_tree<ValTp_, Cmp_, KNKMAX>::~loser_tree()
01592     {
01593         STXXL_VERBOSE1("loser_tree::~loser_tree()");
01594         for (unsigned_type i = 0; i < k; ++i)
01595         {
01596             if (segment[i])
01597             {
01598                 STXXL_VERBOSE2("loser_tree::~loser_tree() deleting segment " << i);
01599                 delete[] segment[i];
01600                 mem_cons_ -= segment_size[i];
01601             }
01602         }
01603         // check whether we did not loose memory
01604         assert(mem_cons_ == 0);
01605     }
01606 
01607 // free an empty segment .
01608     template <class ValTp_, class Cmp_, unsigned KNKMAX>
01609     void loser_tree<ValTp_, Cmp_, KNKMAX>::deallocate_segment(unsigned_type slot)
01610     {
01611         // reroute current pointer to some empty sentinel segment
01612         // with a sentinel key
01613         STXXL_VERBOSE2("loser_tree::deallocate_segment() deleting segment " <<
01614                        slot << " address: " << segment[slot] << " size: " << segment_size[slot]);
01615         current[slot] = &sentinel;
01616 
01617         // free memory
01618         delete[] segment[slot];
01619         segment[slot] = NULL;
01620         mem_cons_ -= segment_size[slot];
01621 
01622         // push on the stack of free segment indices
01623         free_segments.push(slot);
01624     }
01625 
01626 
01627 // delete the length smallest elements and write them to "to"
01628 // empty segments are deallocated
01629 // require:
01630 // - there are at least length elements
01631 // - segments are ended by sentinels
01632     template <class ValTp_, class Cmp_, unsigned KNKMAX>
01633     void loser_tree<ValTp_, Cmp_, KNKMAX>::multi_merge(Element * to, unsigned_type length)
01634     {
01635         STXXL_VERBOSE3("loser_tree::multi_merge(to=" << to << ", len=" << length << ") k=" << k);
01636 
01637         if (length == 0)
01638             return;
01639 
01640         assert(k > 0);
01641         assert(length <= size_);
01642 
01643         //This is the place to make statistics about internal multi_merge calls.
01644 
01645         switch (logK) {
01646         case 0:
01647             assert(k == 1);
01648             assert(entry[0].index == 0);
01649             assert(free_segments.empty());
01650             //memcpy(to, current[0], length * sizeof(Element));
01651             std::copy(current[0], current[0] + length, to);
01652             current[0] += length;
01653             entry[0].key = **current;
01654             if (is_segment_empty(0))
01655                 deallocate_segment(0);
01656 
01657             break;
01658         case 1:
01659             assert(k == 2);
01660             merge_iterator(current[0], current[1], to, length, cmp);
01661             rebuildLoserTree();
01662             if (is_segment_empty(0))
01663                 deallocate_segment(0);
01664 
01665             if (is_segment_empty(1))
01666                 deallocate_segment(1);
01667 
01668             break;
01669         case 2:
01670             assert(k == 4);
01671             if (is_segment_empty(3))
01672                 merge3_iterator(current[0], current[1], current[2], to, length, cmp);
01673             else
01674                 merge4_iterator(current[0], current[1], current[2], current[3], to, length, cmp);
01675 
01676             rebuildLoserTree();
01677             if (is_segment_empty(0))
01678                 deallocate_segment(0);
01679 
01680             if (is_segment_empty(1))
01681                 deallocate_segment(1);
01682 
01683             if (is_segment_empty(2))
01684                 deallocate_segment(2);
01685 
01686             if (is_segment_empty(3))
01687                 deallocate_segment(3);
01688 
01689             break;
01690         case  3: multi_merge_f<3>(to, length);
01691             break;
01692         case  4: multi_merge_f<4>(to, length);
01693             break;
01694         case  5: multi_merge_f<5>(to, length);
01695             break;
01696         case  6: multi_merge_f<6>(to, length);
01697             break;
01698         case  7: multi_merge_f<7>(to, length);
01699             break;
01700         case  8: multi_merge_f<8>(to, length);
01701             break;
01702         case  9: multi_merge_f<9>(to, length);
01703             break;
01704         case 10: multi_merge_f<10>(to, length);
01705             break;
01706         default: multi_merge_k(to, length);
01707             break;
01708         }
01709 
01710 
01711         size_ -= length;
01712 
01713         // compact tree if it got considerably smaller
01714         {
01715             const unsigned_type num_segments_used = k - free_segments.size();
01716             const unsigned_type num_segments_trigger = k - (3 * k / 5);
01717             // using k/2 would be worst case inefficient (for large k)
01718             // for k \in {2, 4, 8} the trigger is k/2 which is good
01719             // because we have special mergers for k \in {1, 2, 4}
01720             // there is also a special 3-way-merger, that will be
01721             // triggered if k == 4 && is_segment_empty(3)
01722             STXXL_VERBOSE3("loser_tree  compact? k=" << k << " #used=" << num_segments_used
01723                                                      << " <= #trigger=" << num_segments_trigger << " ==> "
01724                                                      << ((k > 1 && num_segments_used <= num_segments_trigger) ? "yes" : "no ")
01725                                                      << " || "
01726                                                      << ((k == 4 && !free_segments.empty() && !is_segment_empty(3)) ? "yes" : "no ")
01727                                                      << " #free=" << free_segments.size());
01728             if (k > 1 && ((num_segments_used <= num_segments_trigger) ||
01729                           (k == 4 && !free_segments.empty() && !is_segment_empty(3))))
01730             {
01731                 compactTree();
01732             }
01733         }
01734         //std::copy(to,to + length,std::ostream_iterator<ValTp_>(std::cout, "\n"));
01735     }
01736 
01737 
01738 // is this segment empty and does not point to sentinel yet?
01739     template <class ValTp_, class Cmp_, unsigned KNKMAX>
01740     inline bool loser_tree<ValTp_, Cmp_, KNKMAX>::is_segment_empty(unsigned_type slot)
01741     {
01742         return (is_sentinel(*(current[slot])) && (current[slot] != &sentinel));
01743     }
01744 
01745 // multi-merge for arbitrary K
01746     template <class ValTp_, class Cmp_, unsigned KNKMAX>
01747     void loser_tree<ValTp_, Cmp_, KNKMAX>::
01748     multi_merge_k(Element * to, unsigned_type length)
01749     {
01750         Entry * currentPos;
01751         Element currentKey;
01752         unsigned_type currentIndex; // leaf pointed to by current entry
01753         unsigned_type kReg = k;
01754         Element * done = to + length;
01755         unsigned_type winnerIndex = entry[0].index;
01756         Element winnerKey = entry[0].key;
01757         Element * winnerPos;
01758 
01759         while (to != done)
01760         {
01761             winnerPos = current[winnerIndex];
01762 
01763             // write result
01764             *to = winnerKey;
01765 
01766             // advance winner segment
01767             ++winnerPos;
01768             current[winnerIndex] = winnerPos;
01769             winnerKey = *winnerPos;
01770 
01771             // remove winner segment if empty now
01772             if (is_sentinel(winnerKey)) //
01773                 deallocate_segment(winnerIndex);
01774 
01775 
01776             // go up the entry-tree
01777             for (unsigned_type i = (winnerIndex + kReg) >> 1;  i > 0;  i >>= 1) {
01778                 currentPos = entry + i;
01779                 currentKey = currentPos->key;
01780                 if (cmp(winnerKey, currentKey)) {
01781                     currentIndex = currentPos->index;
01782                     currentPos->key = winnerKey;
01783                     currentPos->index = winnerIndex;
01784                     winnerKey = currentKey;
01785                     winnerIndex = currentIndex;
01786                 }
01787             }
01788 
01789             ++to;
01790         }
01791         entry[0].index = winnerIndex;
01792         entry[0].key = winnerKey;
01793     }
01794 }
01795 
01796 /*
01797 
01798    KNBufferSize1 = 32;
01799    KNN = 512; // bandwidth
01800    KNKMAX = 64;  // maximal arity
01801    KNLevels = 4; // overall capacity >= KNN*KNKMAX^KNLevels
01802    LogKNKMAX = 6;  // ceil(log KNK)
01803  */
01804 
01805 // internal memory consumption >= N_*(KMAX_^Levels_) + ext
01806 
01807 template <
01808     class Tp_,
01809     class Cmp_,
01810     unsigned BufferSize1_ = 32,       // equalize procedure call overheads etc.
01811     unsigned N_ = 512,                // bandwidth
01812     unsigned IntKMAX_ = 64,           // maximal arity for internal mergers
01813     unsigned IntLevels_ = 4,
01814     unsigned BlockSize_ = (2 * 1024 * 1024),
01815     unsigned ExtKMAX_ = 64,           // maximal arity for external mergers
01816     unsigned ExtLevels_ = 2,
01817     class AllocStr_ = STXXL_DEFAULT_ALLOC_STRATEGY
01818     >
01819 struct priority_queue_config
01820 {
01821     typedef Tp_ value_type;
01822     typedef Cmp_ comparator_type;
01823     typedef AllocStr_ alloc_strategy_type;
01824     enum
01825     {
01826         BufferSize1 = BufferSize1_,
01827         N = N_,
01828         IntKMAX = IntKMAX_,
01829         IntLevels = IntLevels_,
01830         ExtLevels = ExtLevels_,
01831         BlockSize = BlockSize_,
01832         ExtKMAX = ExtKMAX_,
01833         E = sizeof(Tp_)
01834     };
01835 };
01836 
01837 __STXXL_END_NAMESPACE
01838 
01839 namespace std
01840 {
01841     template <class BlockType_,
01842               class Cmp_,
01843               unsigned Arity_,
01844               class AllocStr_>
01845     void swap(stxxl::priority_queue_local::ext_merger<BlockType_, Cmp_, Arity_, AllocStr_> & a,
01846               stxxl::priority_queue_local::ext_merger<BlockType_, Cmp_, Arity_, AllocStr_> & b)
01847     {
01848         a.swap(b);
01849     }
01850     template <class ValTp_, class Cmp_, unsigned KNKMAX>
01851     void swap(stxxl::priority_queue_local::loser_tree<ValTp_, Cmp_, KNKMAX> & a,
01852               stxxl::priority_queue_local::loser_tree<ValTp_, Cmp_, KNKMAX> & b)
01853     {
01854         a.swap(b);
01855     }
01856 }
01857 
01858 __STXXL_BEGIN_NAMESPACE
01859 
01861 template <class Config_>
01862 class priority_queue : private noncopyable
01863 {
01864 public:
01865     typedef Config_ Config;
01866     enum
01867     {
01868         BufferSize1 = Config::BufferSize1,
01869         N = Config::N,
01870         IntKMAX = Config::IntKMAX,
01871         IntLevels = Config::IntLevels,
01872         ExtLevels = Config::ExtLevels,
01873         Levels = Config::IntLevels + Config::ExtLevels,
01874         BlockSize = Config::BlockSize,
01875         ExtKMAX = Config::ExtKMAX
01876     };
01877 
01879     typedef typename Config::value_type value_type;
01881     typedef typename Config::comparator_type comparator_type;
01882     typedef typename Config::alloc_strategy_type alloc_strategy_type;
01884     typedef stxxl::uint64 size_type;
01885     typedef typed_block<BlockSize, value_type> block_type;
01886 
01887 protected:
01888     typedef priority_queue_local::internal_priority_queue<value_type, std::vector<value_type>, comparator_type>
01889     insert_heap_type;
01890 
01891     typedef priority_queue_local::loser_tree<
01892         value_type,
01893         comparator_type,
01894         IntKMAX> int_merger_type;
01895 
01896     typedef priority_queue_local::ext_merger<
01897         block_type,
01898         comparator_type,
01899         ExtKMAX,
01900         alloc_strategy_type> ext_merger_type;
01901 
01902 
01903     int_merger_type itree[IntLevels];
01904     prefetch_pool<block_type> & p_pool;
01905     write_pool<block_type> & w_pool;
01906     ext_merger_type * etree;
01907 
01908     // one delete buffer for each tree => group buffer
01909     value_type buffer2[Levels][N + 1]; // tree->buffer2->buffer1 (extra space for sentinel)
01910     value_type * minBuffer2[Levels];   // minBuffer2[i] is current start of buffer2[i], end is buffer2[i] + N
01911 
01912     // overall delete buffer
01913     value_type buffer1[BufferSize1 + 1];
01914     value_type * minBuffer1;           // is current start of buffer1, end is buffer1 + BufferSize1
01915 
01916     comparator_type cmp;
01917 
01918     // insert buffer
01919     insert_heap_type insertHeap;
01920 
01921     // how many levels are active
01922     unsigned_type activeLevels;
01923 
01924     // total size not counting insertBuffer and buffer1
01925     size_type size_;
01926     bool deallocate_pools;
01927 
01928     // private member functions
01929     void refillBuffer1();
01930     unsigned_type refillBuffer2(unsigned_type k);
01931 
01932     unsigned_type makeSpaceAvailable(unsigned_type level);
01933     void emptyInsertHeap();
01934 
01935     value_type getSupremum() const { return cmp.min_value(); } //{ return buffer2[0][KNN].key; }
01936     unsigned_type getSize1() const { return (buffer1 + BufferSize1) - minBuffer1; }
01937     unsigned_type getSize2(unsigned_type i) const { return &(buffer2[i][N]) - minBuffer2[i]; }
01938 
01939 public:
01949     priority_queue(prefetch_pool<block_type> & p_pool_, write_pool<block_type> & w_pool_);
01950 
01960     priority_queue(unsigned_type p_pool_mem, unsigned_type w_pool_mem);
01961 
01962     void swap(priority_queue & obj)
01963     {
01964         //swap_1D_arrays(itree,obj.itree,IntLevels); // does not work in g++ 3.4.3 :( bug?
01965         for (unsigned_type i = 0; i < IntLevels; ++i)
01966             std::swap(itree[i], obj.itree[i]);
01967 
01968         // std::swap(p_pool,obj.p_pool);
01969         // std::swap(w_pool,obj.w_pool);
01970         std::swap(etree, obj.etree);
01971         for (unsigned_type i1 = 0; i1 < Levels; ++i1)
01972             for (unsigned_type i2 = 0; i2 < (N + 1); ++i2)
01973                 std::swap(buffer2[i1][i2], obj.buffer2[i1][i2]);
01974 
01975         swap_1D_arrays(minBuffer2, obj.minBuffer2, Levels);
01976         swap_1D_arrays(buffer1, obj.buffer1, BufferSize1 + 1);
01977         std::swap(minBuffer1, obj.minBuffer1);
01978         std::swap(cmp, obj.cmp);
01979         std::swap(insertHeap, obj.insertHeap);
01980         std::swap(activeLevels, obj.activeLevels);
01981         std::swap(size_, obj.size_);
01982         //std::swap(deallocate_pools,obj.deallocate_pools);
01983     }
01984 
01985     virtual ~priority_queue();
01986 
01989     size_type size() const;
01990 
01993     bool empty() const { return (size() == 0); }
01994 
02006     const value_type & top() const;
02007 
02014     void pop();
02015 
02020     void push(const value_type & obj);
02021 
02026     unsigned_type mem_cons() const
02027     {
02028         unsigned_type dynam_alloc_mem(0), i(0);
02029         //dynam_alloc_mem += w_pool.mem_cons();
02030         //dynam_alloc_mem += p_pool.mem_cons();
02031         for ( ; i < IntLevels; ++i)
02032             dynam_alloc_mem += itree[i].mem_cons();
02033 
02034         for (i = 0; i < ExtLevels; ++i)
02035             dynam_alloc_mem += etree[i].mem_cons();
02036 
02037 
02038         return (sizeof(*this) +
02039                 sizeof(ext_merger_type) * ExtLevels +
02040                 dynam_alloc_mem);
02041     }
02042 };
02043 
02044 
02045 template <class Config_>
02046 inline typename priority_queue<Config_>::size_type priority_queue<Config_>::size() const
02047 {
02048     return size_ +
02049            insertHeap.size() - 1 +
02050            ((buffer1 + BufferSize1) - minBuffer1);
02051 }
02052 
02053 
02054 template <class Config_>
02055 inline const typename priority_queue<Config_>::value_type & priority_queue<Config_>::top() const
02056 {
02057     assert(!insertHeap.empty());
02058 
02059     const typename priority_queue<Config_>::value_type & t = insertHeap.top();
02060     if (/*(!insertHeap.empty()) && */ cmp(*minBuffer1, t))
02061         return t;
02062 
02063 
02064     return *minBuffer1;
02065 }
02066 
02067 template <class Config_>
02068 inline void priority_queue<Config_>::pop()
02069 {
02070     //STXXL_VERBOSE1("priority_queue::pop()");
02071     assert(!insertHeap.empty());
02072 
02073     if (/*(!insertHeap.empty()) && */ cmp(*minBuffer1, insertHeap.top()))
02074     {
02075         insertHeap.pop();
02076     }
02077     else
02078     {
02079         assert(minBuffer1 < buffer1 + BufferSize1);
02080         ++minBuffer1;
02081         if (minBuffer1 == buffer1 + BufferSize1)
02082             refillBuffer1();
02083     }
02084 }
02085 
02086 template <class Config_>
02087 inline void priority_queue<Config_>::push(const value_type & obj)
02088 {
02089     //STXXL_VERBOSE3("priority_queue::push("<< obj <<")");
02090     assert(itree->not_sentinel(obj));
02091     if (insertHeap.size() == N + 1)
02092         emptyInsertHeap();
02093 
02094 
02095     assert(!insertHeap.empty());
02096 
02097     insertHeap.push(obj);
02098 }
02099 
02100 
02102 
02103 template <class Config_>
02104 priority_queue<Config_>::priority_queue(prefetch_pool<block_type> & p_pool_, write_pool<block_type> & w_pool_) :
02105     p_pool(p_pool_), w_pool(w_pool_),
02106     insertHeap(N + 2),
02107     activeLevels(0), size_(0),
02108     deallocate_pools(false)
02109 {
02110     STXXL_VERBOSE2("priority_queue::priority_queue()");
02111     assert(!cmp(cmp.min_value(), cmp.min_value())); // verify strict weak ordering
02112     //etree = new ext_merger_type[ExtLevels](p_pool,w_pool);
02113     etree = new ext_merger_type[ExtLevels];
02114     for (unsigned_type j = 0; j < ExtLevels; ++j)
02115         etree[j].set_pools(&p_pool, &w_pool);
02116 
02117     value_type sentinel = cmp.min_value();
02118     buffer1[BufferSize1] = sentinel;      // sentinel
02119     insertHeap.push(sentinel);            // always keep the sentinel
02120     minBuffer1 = buffer1 + BufferSize1;   // empty
02121     for (unsigned_type i = 0;  i < Levels;  i++)
02122     {
02123         buffer2[i][N] = sentinel;         // sentinel
02124         minBuffer2[i] = &(buffer2[i][N]); // empty
02125     }
02126 }
02127 
02128 template <class Config_>
02129 priority_queue<Config_>::priority_queue(unsigned_type p_pool_mem, unsigned_type w_pool_mem) :
02130     p_pool(*(new prefetch_pool<block_type>(p_pool_mem / BlockSize))),
02131     w_pool(*(new write_pool<block_type>(w_pool_mem / BlockSize))),
02132     insertHeap(N + 2),
02133     activeLevels(0), size_(0),
02134     deallocate_pools(true)
02135 {
02136     STXXL_VERBOSE2("priority_queue::priority_queue()");
02137     assert(!cmp(cmp.min_value(), cmp.min_value())); // verify strict weak ordering
02138     etree = new ext_merger_type[ExtLevels];
02139     for (unsigned_type j = 0; j < ExtLevels; ++j)
02140         etree[j].set_pools(&p_pool, &w_pool);
02141 
02142     value_type sentinel = cmp.min_value();
02143     buffer1[BufferSize1] = sentinel;      // sentinel
02144     insertHeap.push(sentinel);            // always keep the sentinel
02145     minBuffer1 = buffer1 + BufferSize1;   // empty
02146     for (unsigned_type i = 0;  i < Levels;  i++)
02147     {
02148         buffer2[i][N] = sentinel;         // sentinel
02149         minBuffer2[i] = &(buffer2[i][N]); // empty
02150     }
02151 }
02152 
02153 template <class Config_>
02154 priority_queue<Config_>::~priority_queue()
02155 {
02156     STXXL_VERBOSE2("priority_queue::~priority_queue()");
02157     if (deallocate_pools)
02158     {
02159         delete &p_pool;
02160         delete &w_pool;
02161     }
02162 
02163     delete[] etree;
02164 }
02165 
02166 //--------------------- Buffer refilling -------------------------------
02167 
02168 // refill buffer2[j] and return number of elements found
02169 template <class Config_>
02170 unsigned_type priority_queue<Config_>::refillBuffer2(unsigned_type j)
02171 {
02172     STXXL_VERBOSE2("priority_queue::refillBuffer2(" << j << ")");
02173 
02174     value_type * oldTarget;
02175     unsigned_type deleteSize;
02176     size_type treeSize = (j < IntLevels) ? itree[j].size() : etree[j - IntLevels].size();  //elements left in segments
02177     unsigned_type bufferSize = buffer2[j] + N - minBuffer2[j];                             //elements left in target buffer
02178     if (treeSize + bufferSize >= size_type(N))
02179     {                                                                                      // buffer will be filled completely
02180         oldTarget = buffer2[j];
02181         deleteSize = N - bufferSize;
02182     }
02183     else
02184     {
02185         oldTarget = buffer2[j] + N - treeSize - bufferSize;
02186         deleteSize = treeSize;
02187     }
02188 
02189     if (deleteSize > 0)
02190     {
02191         // shift  rest to beginning
02192         // possible hack:
02193         // - use memcpy if no overlap
02194         memmove(oldTarget, minBuffer2[j], bufferSize * sizeof(value_type));
02195         minBuffer2[j] = oldTarget;
02196 
02197         // fill remaining space from tree
02198         if (j < IntLevels)
02199             itree[j].multi_merge(oldTarget + bufferSize, deleteSize);
02200 
02201         else
02202         {
02203             //external
02204             etree[j - IntLevels].multi_merge(oldTarget + bufferSize,
02205                                              oldTarget + bufferSize + deleteSize);
02206         }
02207     }
02208 
02209 
02210     //STXXL_MSG(deleteSize + bufferSize);
02211     //std::copy(oldTarget,oldTarget + deleteSize + bufferSize,std::ostream_iterator<value_type>(std::cout, "\n"));
02212 
02213     return deleteSize + bufferSize;
02214 }
02215 
02216 
02217 // move elements from the 2nd level buffers
02218 // to the buffer
02219 template <class Config_>
02220 void priority_queue<Config_>::refillBuffer1()
02221 {
02222     STXXL_VERBOSE2("priority_queue::refillBuffer1()");
02223 
02224     size_type totalSize = 0;
02225     unsigned_type sz;
02226     //activeLevels is <= 4
02227     for (int_type i = activeLevels - 1;  i >= 0;  i--)
02228     {
02229         if ((buffer2[i] + N) - minBuffer2[i] < BufferSize1)
02230         {
02231             sz = refillBuffer2(i);
02232             // max active level dry now?
02233             if (sz == 0 && unsigned_type(i) == activeLevels - 1)
02234                 --activeLevels;
02235 
02236             else
02237                 totalSize += sz;
02238         }
02239         else
02240         {
02241             totalSize += BufferSize1;    // actually only a sufficient lower bound
02242         }
02243     }
02244 
02245     if (totalSize >= BufferSize1)        // buffer can be filled completely
02246     {
02247         minBuffer1 = buffer1;
02248         sz = BufferSize1;                // amount to be copied
02249         size_ -= size_type(BufferSize1); // amount left in buffer2
02250     }
02251     else
02252     {
02253         minBuffer1 = buffer1 + BufferSize1 - totalSize;
02254         sz = totalSize;
02255         assert(size_ == size_type(sz)); // trees and buffer2 get empty
02256         size_ = 0;
02257     }
02258 
02259     // now call simplified refill routines
02260     // which can make the assumption that
02261     // they find all they are asked to find in the buffers
02262     minBuffer1 = buffer1 + BufferSize1 - sz;
02263     STXXL_VERBOSE2("Active levels = " << activeLevels);
02264     switch (activeLevels)
02265     {
02266     case 0: break;
02267     case 1:
02268         std::copy(minBuffer2[0], minBuffer2[0] + sz, minBuffer1);
02269         minBuffer2[0] += sz;
02270         break;
02271     case 2:
02272         priority_queue_local::merge_iterator(
02273             minBuffer2[0],
02274             minBuffer2[1], minBuffer1, sz, cmp);
02275         break;
02276     case 3:
02277         priority_queue_local::merge3_iterator(
02278             minBuffer2[0],
02279             minBuffer2[1],
02280             minBuffer2[2], minBuffer1, sz, cmp);
02281         break;
02282     case 4:
02283         priority_queue_local::merge4_iterator(
02284             minBuffer2[0],
02285             minBuffer2[1],
02286             minBuffer2[2],
02287             minBuffer2[3], minBuffer1, sz, cmp); //side effect free
02288         break;
02289     default:
02290         STXXL_THROW(std::runtime_error, "priority_queue<...>::refillBuffer1()",
02291                     "Overflow! The number of buffers on 2nd level in stxxl::priority_queue is currently limited to 4");
02292     }
02293 
02294     //std::copy(minBuffer1,minBuffer1 + sz,std::ostream_iterator<value_type>(std::cout, "\n"));
02295 }
02296 
02297 //--------------------------------------------------------------------
02298 
02299 // check if space is available on level k and
02300 // empty this level if necessary leading to a recursive call.
02301 // return the level where space was finally available
02302 template <class Config_>
02303 unsigned_type priority_queue<Config_>::makeSpaceAvailable(unsigned_type level)
02304 {
02305     STXXL_VERBOSE2("priority_queue::makeSpaceAvailable(" << level << ")");
02306     unsigned_type finalLevel;
02307     assert(level < Levels);
02308     assert(level <= activeLevels);
02309 
02310     if (level == activeLevels)
02311         activeLevels++;
02312 
02313 
02314     const bool spaceIsAvailable_ =
02315         (level < IntLevels) ? itree[level].spaceIsAvailable()
02316         : ((level == Levels - 1) ? true : (etree[level - IntLevels].spaceIsAvailable()));
02317 
02318     if (spaceIsAvailable_)
02319     {
02320         finalLevel = level;
02321     }
02322     else
02323     {
02324         finalLevel = makeSpaceAvailable(level + 1);
02325 
02326         if (level < IntLevels - 1)                             // from internal to internal tree
02327         {
02328             unsigned_type segmentSize = itree[level].size();
02329             value_type * newSegment = new value_type[segmentSize + 1];
02330             itree[level].multi_merge(newSegment, segmentSize); // empty this level
02331 
02332             newSegment[segmentSize] = buffer1[BufferSize1];    // sentinel
02333             // for queues where size << #inserts
02334             // it might make sense to stay in this level if
02335             // segmentSize < alpha * KNN * k^level for some alpha < 1
02336             itree[level + 1].insert_segment(newSegment, segmentSize);
02337         }
02338         else
02339         {
02340             if (level == IntLevels - 1) // from internal to external tree
02341             {
02342                 const unsigned_type segmentSize = itree[IntLevels - 1].size();
02343                 STXXL_VERBOSE1("Inserting segment into first level external: " << level << " " << segmentSize);
02344                 etree[0].insert_segment(itree[IntLevels - 1], segmentSize);
02345             }
02346             else // from external to external tree
02347             {
02348                 const size_type segmentSize = etree[level - IntLevels].size();
02349                 STXXL_VERBOSE1("Inserting segment into second level external: " << level << " " << segmentSize);
02350                 etree[level - IntLevels + 1].insert_segment(etree[level - IntLevels], segmentSize);
02351             }
02352         }
02353     }
02354     return finalLevel;
02355 }
02356 
02357 
02358 // empty the insert heap into the main data structure
02359 template <class Config_>
02360 void priority_queue<Config_>::emptyInsertHeap()
02361 {
02362     STXXL_VERBOSE2("priority_queue::emptyInsertHeap()");
02363     assert(insertHeap.size() == (N + 1));
02364 
02365     const value_type sup = getSupremum();
02366 
02367     // build new segment
02368     value_type * newSegment = new value_type[N + 1];
02369     value_type * newPos = newSegment;
02370 
02371     // put the new data there for now
02372     //insertHeap.sortTo(newSegment);
02373     value_type * SortTo = newSegment;
02374 
02375     insertHeap.sort_to(SortTo);
02376 
02377     SortTo = newSegment + N;
02378     insertHeap.clear();
02379     insertHeap.push(*SortTo);
02380 
02381     assert(insertHeap.size() == 1);
02382 
02383     newSegment[N] = sup; // sentinel
02384 
02385     // copy the buffer1 and buffer2[0] to temporary storage
02386     // (the temporary can be eliminated using some dirty tricks)
02387     const unsigned_type tempSize = N + BufferSize1;
02388     value_type temp[tempSize + 1];
02389     unsigned_type sz1 = getSize1();
02390     unsigned_type sz2 = getSize2(0);
02391     value_type * pos = temp + tempSize - sz1 - sz2;
02392     std::copy(minBuffer1, minBuffer1 + sz1, pos);
02393     std::copy(minBuffer2[0], minBuffer2[0] + sz2, pos + sz1);
02394     temp[tempSize] = sup; // sentinel
02395 
02396     // refill buffer1
02397     // (using more complicated code it could be made somewhat fuller
02398     // in certain circumstances)
02399     priority_queue_local::merge_iterator(pos, newPos, minBuffer1, sz1, cmp);
02400 
02401     // refill buffer2[0]
02402     // (as above we might want to take the opportunity
02403     // to make buffer2[0] fuller)
02404     priority_queue_local::merge_iterator(pos, newPos, minBuffer2[0], sz2, cmp);
02405 
02406     // merge the rest to the new segment
02407     // note that merge exactly trips into the footsteps
02408     // of itself
02409     priority_queue_local::merge_iterator(pos, newPos, newSegment, N, cmp);
02410 
02411     // and insert it
02412     unsigned_type freeLevel = makeSpaceAvailable(0);
02413     assert(freeLevel == 0 || itree[0].size() == 0);
02414     itree[0].insert_segment(newSegment, N);
02415 
02416     // get rid of invalid level 2 buffers
02417     // by inserting them into tree 0 (which is almost empty in this case)
02418     if (freeLevel > 0)
02419     {
02420         for (int_type i = freeLevel;  i >= 0;  i--)
02421         {
02422             // reverse order not needed
02423             // but would allow immediate refill
02424             newSegment = new value_type[getSize2(i) + 1]; // with sentinel
02425             std::copy(minBuffer2[i], minBuffer2[i] + getSize2(i) + 1, newSegment);
02426             itree[0].insert_segment(newSegment, getSize2(i));
02427             minBuffer2[i] = buffer2[i] + N;               // empty
02428         }
02429     }
02430 
02431     // update size
02432     size_ += size_type(N);
02433 
02434     // special case if the tree was empty before
02435     if (minBuffer1 == buffer1 + BufferSize1)
02436         refillBuffer1();
02437 }
02438 
02439 namespace priority_queue_local
02440 {
02441     struct Parameters_for_priority_queue_not_found_Increase_IntM
02442     {
02443         enum { fits = false };
02444         typedef Parameters_for_priority_queue_not_found_Increase_IntM result;
02445     };
02446 
02447     struct dummy
02448     {
02449         enum { fits = false };
02450         typedef dummy result;
02451     };
02452 
02453     template <unsigned_type E_, unsigned_type IntM_, unsigned_type MaxS_, unsigned_type B_, unsigned_type m_, bool stop = false>
02454     struct find_B_m
02455     {
02456         typedef find_B_m<E_, IntM_, MaxS_, B_, m_, stop> Self;
02457         enum {
02458             k = IntM_ / B_, // number of blocks that fit into M
02459             E = E_,         // element size
02460             IntM = IntM_,
02461             B = B_,         // block size
02462             m = m_,         // number of blocks fitting into buffers
02463             c = k - m_,
02464             // memory occ. by block must be at least 10 times larger than size of ext sequence
02465             // && satisfy memory req && if we have two ext mergers their degree must be at least 64=m/2
02466             fits = c > 10 && ((k - m) * (m) * (m * B / (E * 4 * 1024))) >= MaxS_ && (MaxS_ < ((k - m) * m / (2 * E)) * 1024 || m >= 128),
02467             step = 1
02468         };
02469 
02470         typedef typename find_B_m<E, IntM, MaxS_, B, m + step, fits || (m >= k - step)>::result candidate1;
02471         typedef typename find_B_m<E, IntM, MaxS_, B / 2, 1, fits || candidate1::fits>::result candidate2;
02472         typedef typename IF<fits, Self, typename IF<candidate1::fits, candidate1, candidate2>::result>::result result;
02473     };
02474 
02475     // specialization for the case when no valid parameters are found
02476     template <unsigned_type E_, unsigned_type IntM_, unsigned_type MaxS_, bool stop>
02477     struct find_B_m<E_, IntM_, MaxS_, 2048, 1, stop>
02478     {
02479         enum { fits = false };
02480         typedef Parameters_for_priority_queue_not_found_Increase_IntM result;
02481     };
02482 
02483     // to speedup search
02484     template <unsigned_type E_, unsigned_type IntM_, unsigned_type MaxS_, unsigned_type B_, unsigned_type m_>
02485     struct find_B_m<E_, IntM_, MaxS_, B_, m_, true>
02486     {
02487         enum { fits = false };
02488         typedef dummy result;
02489     };
02490 
02491     // E_ size of element in bytes
02492     template <unsigned_type E_, unsigned_type IntM_, unsigned_type MaxS_>
02493     struct find_settings
02494     {
02495         // start from block size (8*1024*1024) bytes
02496         typedef typename find_B_m<E_, IntM_, MaxS_, (8 * 1024 * 1024), 1>::result result;
02497     };
02498 
02499     struct Parameters_not_found_Try_to_change_the_Tune_parameter
02500     {
02501         typedef Parameters_not_found_Try_to_change_the_Tune_parameter result;
02502     };
02503 
02504 
02505     template <unsigned_type AI_, unsigned_type X_, unsigned_type CriticalSize_>
02506     struct compute_N
02507     {
02508         typedef compute_N<AI_, X_, CriticalSize_> Self;
02509         enum
02510         {
02511             X = X_,
02512             AI = AI_,
02513             N = X / (AI * AI) // two stage internal
02514         };
02515         typedef typename IF<(N >= CriticalSize_), Self, typename compute_N<AI / 2, X, CriticalSize_>::result>::result result;
02516     };
02517 
02518     template <unsigned_type X_, unsigned_type CriticalSize_>
02519     struct compute_N<1, X_, CriticalSize_>
02520     {
02521         typedef Parameters_not_found_Try_to_change_the_Tune_parameter result;
02522     };
02523 }
02524 
02526 
02529 
02531 
02595 template <class Tp_, class Cmp_, unsigned_type IntM_, unsigned MaxS_, unsigned Tune_ = 6>
02596 class PRIORITY_QUEUE_GENERATOR
02597 {
02598 public:
02599     // actual calculation of B, m, k and E
02600     typedef typename priority_queue_local::find_settings<sizeof(Tp_), IntM_, MaxS_>::result settings;
02601     enum {
02602         B = settings::B,
02603         m = settings::m,
02604         X = B * (settings::k - m) / settings::E,  // interpretation of result
02605         Buffer1Size = 32                          // fixed
02606     };
02607     // derivation of N, AI, AE
02608     typedef typename priority_queue_local::compute_N<(1 << Tune_), X, 4 * Buffer1Size>::result ComputeN;
02609     enum
02610     {
02611         N = ComputeN::N,
02612         AI = ComputeN::AI,
02613         AE = (m / 2 < 2) ? 2 : (m / 2)            // at least 2
02614     };
02615     enum {
02616         // Estimation of maximum internal memory consumption (in bytes)
02617         EConsumption = X * settings::E + settings::B * AE + ((MaxS_ / X) / AE) * settings::B * 1024
02618     };
02619     /*
02620         unsigned BufferSize1_ = 32, // equalize procedure call overheads etc.
02621         unsigned N_ = 512,          // bandwidth
02622         unsigned IntKMAX_ = 64,     // maximal arity for internal mergers
02623         unsigned IntLevels_ = 4,
02624         unsigned BlockSize_ = (2*1024*1024),
02625         unsigned ExtKMAX_ = 64,     // maximal arity for external mergers
02626         unsigned ExtLevels_ = 2,
02627      */
02628     typedef priority_queue<priority_queue_config<Tp_, Cmp_, Buffer1Size, N, AI, 2, B, AE, 2> > result;
02629 };
02630 
02632 
02633 __STXXL_END_NAMESPACE
02634 
02635 
02636 namespace std
02637 {
02638     template <class Config_>
02639     void swap(stxxl::priority_queue<Config_> & a,
02640               stxxl::priority_queue<Config_> & b)
02641     {
02642         a.swap(b);
02643     }
02644 }
02645 
02646 #endif // !STXXL_PRIORITY_QUEUE_HEADER
02647 // vim: et:ts=4:sw=4