00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H
00033 #define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1
00034
00035 #include <vector>
00036
00037 #include <parallel/basic_iterator.h>
00038 #include <bits/stl_algo.h>
00039 #include <parallel/parallel.h>
00040 #include <parallel/multiway_merge.h>
00041
00042 namespace __gnu_parallel
00043 {
00044
00045 template<typename _DifferenceTp>
00046 struct _Piece
00047 {
00048 typedef _DifferenceTp _DifferenceType;
00049
00050
00051 _DifferenceType _M_begin;
00052
00053
00054 _DifferenceType _M_end;
00055 };
00056
00057
00058
00059
00060 template<typename _RAIter>
00061 struct _PMWMSSortingData
00062 {
00063 typedef std::iterator_traits<_RAIter> _TraitsType;
00064 typedef typename _TraitsType::value_type _ValueType;
00065 typedef typename _TraitsType::difference_type _DifferenceType;
00066
00067
00068 _ThreadIndex _M_num_threads;
00069
00070
00071 _RAIter _M_source;
00072
00073
00074 _DifferenceType* _M_starts;
00075
00076
00077 _ValueType** _M_temporary;
00078
00079
00080 _ValueType* _M_samples;
00081
00082
00083 _DifferenceType* _M_offsets;
00084
00085
00086 std::vector<_Piece<_DifferenceType> >* _M_pieces;
00087 };
00088
00089
00090
00091
00092
00093
00094
00095 template<typename _RAIter, typename _DifferenceTp>
00096 void
00097 __determine_samples(_PMWMSSortingData<_RAIter>* __sd,
00098 _DifferenceTp __num_samples)
00099 {
00100 typedef std::iterator_traits<_RAIter> _TraitsType;
00101 typedef typename _TraitsType::value_type _ValueType;
00102 typedef _DifferenceTp _DifferenceType;
00103
00104 _ThreadIndex __iam = omp_get_thread_num();
00105
00106 _DifferenceType* __es = new _DifferenceType[__num_samples + 2];
00107
00108 equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam],
00109 __num_samples + 1, __es);
00110
00111 for (_DifferenceType __i = 0; __i < __num_samples; ++__i)
00112 ::new(&(__sd->_M_samples[__iam * __num_samples + __i]))
00113 _ValueType(__sd->_M_source[__sd->_M_starts[__iam]
00114 + __es[__i + 1]]);
00115
00116 delete[] __es;
00117 }
00118
00119
00120 template<bool __exact, typename _RAIter,
00121 typename _Compare, typename _SortingPlacesIterator>
00122 struct _SplitConsistently
00123 { };
00124
00125
00126 template<typename _RAIter, typename _Compare,
00127 typename _SortingPlacesIterator>
00128 struct _SplitConsistently<true, _RAIter, _Compare, _SortingPlacesIterator>
00129 {
00130 void
00131 operator()(const _ThreadIndex __iam,
00132 _PMWMSSortingData<_RAIter>* __sd,
00133 _Compare& __comp,
00134 const typename
00135 std::iterator_traits<_RAIter>::difference_type
00136 __num_samples) const
00137 {
00138 # pragma omp barrier
00139
00140 std::vector<std::pair<_SortingPlacesIterator,
00141 _SortingPlacesIterator> >
00142 __seqs(__sd->_M_num_threads);
00143 for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
00144 __seqs[__s] = std::make_pair(__sd->_M_temporary[__s],
00145 __sd->_M_temporary[__s]
00146 + (__sd->_M_starts[__s + 1]
00147 - __sd->_M_starts[__s]));
00148
00149 std::vector<_SortingPlacesIterator> __offsets(__sd->_M_num_threads);
00150
00151
00152 if (__iam < __sd->_M_num_threads - 1)
00153 multiseq_partition(__seqs.begin(), __seqs.end(),
00154 __sd->_M_starts[__iam + 1], __offsets.begin(),
00155 __comp);
00156
00157 for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
00158 {
00159
00160 if (__iam < (__sd->_M_num_threads - 1))
00161 __sd->_M_pieces[__iam][__seq]._M_end
00162 = __offsets[__seq] - __seqs[__seq].first;
00163 else
00164
00165 __sd->_M_pieces[__iam][__seq]._M_end =
00166 __sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq];
00167 }
00168
00169 # pragma omp barrier
00170
00171 for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
00172 {
00173
00174 if (__iam > 0)
00175 __sd->_M_pieces[__iam][__seq]._M_begin =
00176 __sd->_M_pieces[__iam - 1][__seq]._M_end;
00177 else
00178
00179 __sd->_M_pieces[__iam][__seq]._M_begin = 0;
00180 }
00181 }
00182 };
00183
00184
00185 template<typename _RAIter, typename _Compare,
00186 typename _SortingPlacesIterator>
00187 struct _SplitConsistently<false, _RAIter, _Compare, _SortingPlacesIterator>
00188 {
00189 void
00190 operator()(const _ThreadIndex __iam,
00191 _PMWMSSortingData<_RAIter>* __sd,
00192 _Compare& __comp,
00193 const typename
00194 std::iterator_traits<_RAIter>::difference_type
00195 __num_samples) const
00196 {
00197 typedef std::iterator_traits<_RAIter> _TraitsType;
00198 typedef typename _TraitsType::value_type _ValueType;
00199 typedef typename _TraitsType::difference_type _DifferenceType;
00200
00201 __determine_samples(__sd, __num_samples);
00202
00203 # pragma omp barrier
00204
00205 # pragma omp single
00206 __gnu_sequential::sort(__sd->_M_samples,
00207 __sd->_M_samples
00208 + (__num_samples * __sd->_M_num_threads),
00209 __comp);
00210
00211 # pragma omp barrier
00212
00213 for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
00214 {
00215
00216 if (__num_samples * __iam > 0)
00217 __sd->_M_pieces[__iam][__s]._M_begin =
00218 std::lower_bound(__sd->_M_temporary[__s],
00219 __sd->_M_temporary[__s]
00220 + (__sd->_M_starts[__s + 1]
00221 - __sd->_M_starts[__s]),
00222 __sd->_M_samples[__num_samples * __iam],
00223 __comp)
00224 - __sd->_M_temporary[__s];
00225 else
00226
00227 __sd->_M_pieces[__iam][__s]._M_begin = 0;
00228
00229 if ((__num_samples * (__iam + 1)) <
00230 (__num_samples * __sd->_M_num_threads))
00231 __sd->_M_pieces[__iam][__s]._M_end =
00232 std::lower_bound(__sd->_M_temporary[__s],
00233 __sd->_M_temporary[__s]
00234 + (__sd->_M_starts[__s + 1]
00235 - __sd->_M_starts[__s]),
00236 __sd->_M_samples[__num_samples * (__iam + 1)],
00237 __comp)
00238 - __sd->_M_temporary[__s];
00239 else
00240
00241 __sd->_M_pieces[__iam][__s]._M_end = (__sd->_M_starts[__s + 1]
00242 - __sd->_M_starts[__s]);
00243 }
00244 }
00245 };
00246
00247 template<bool __stable, typename _RAIter, typename _Compare>
00248 struct __possibly_stable_sort
00249 { };
00250
00251 template<typename _RAIter, typename _Compare>
00252 struct __possibly_stable_sort<true, _RAIter, _Compare>
00253 {
00254 void operator()(const _RAIter& __begin,
00255 const _RAIter& __end, _Compare& __comp) const
00256 { __gnu_sequential::stable_sort(__begin, __end, __comp); }
00257 };
00258
00259 template<typename _RAIter, typename _Compare>
00260 struct __possibly_stable_sort<false, _RAIter, _Compare>
00261 {
00262 void operator()(const _RAIter __begin,
00263 const _RAIter __end, _Compare& __comp) const
00264 { __gnu_sequential::sort(__begin, __end, __comp); }
00265 };
00266
00267 template<bool __stable, typename Seq_RAIter,
00268 typename _RAIter, typename _Compare,
00269 typename DiffType>
00270 struct __possibly_stable_multiway_merge
00271 { };
00272
00273 template<typename Seq_RAIter, typename _RAIter,
00274 typename _Compare, typename _DiffType>
00275 struct __possibly_stable_multiway_merge<true, Seq_RAIter,
00276 _RAIter, _Compare, _DiffType>
00277 {
00278 void operator()(const Seq_RAIter& __seqs_begin,
00279 const Seq_RAIter& __seqs_end,
00280 const _RAIter& __target,
00281 _Compare& __comp,
00282 _DiffType __length_am) const
00283 { stable_multiway_merge(__seqs_begin, __seqs_end, __target,
00284 __length_am, __comp, sequential_tag()); }
00285 };
00286
00287 template<typename Seq_RAIter, typename _RAIter,
00288 typename _Compare, typename _DiffType>
00289 struct __possibly_stable_multiway_merge<false, Seq_RAIter,
00290 _RAIter, _Compare, _DiffType>
00291 {
00292 void operator()(const Seq_RAIter& __seqs_begin,
00293 const Seq_RAIter& __seqs_end,
00294 const _RAIter& __target,
00295 _Compare& __comp,
00296 _DiffType __length_am) const
00297 { multiway_merge(__seqs_begin, __seqs_end, __target, __length_am,
00298 __comp, sequential_tag()); }
00299 };
00300
00301
00302
00303
00304
00305 template<bool __stable, bool __exact, typename _RAIter,
00306 typename _Compare>
00307 void
00308 parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd,
00309 _Compare& __comp)
00310 {
00311 typedef std::iterator_traits<_RAIter> _TraitsType;
00312 typedef typename _TraitsType::value_type _ValueType;
00313 typedef typename _TraitsType::difference_type _DifferenceType;
00314
00315 _ThreadIndex __iam = omp_get_thread_num();
00316
00317
00318 _DifferenceType __length_local =
00319 __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam];
00320
00321
00322
00323 typedef _ValueType* _SortingPlacesIterator;
00324
00325 __sd->_M_temporary[__iam] =
00326 static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
00327 * (__length_local + 1)));
00328
00329
00330 std::uninitialized_copy(__sd->_M_source + __sd->_M_starts[__iam],
00331 __sd->_M_source + __sd->_M_starts[__iam]
00332 + __length_local,
00333 __sd->_M_temporary[__iam]);
00334
00335 __possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>()
00336 (__sd->_M_temporary[__iam],
00337 __sd->_M_temporary[__iam] + __length_local,
00338 __comp);
00339
00340
00341
00342
00343
00344
00345 _DifferenceType __num_samples =
00346 _Settings::get().sort_mwms_oversampling * __sd->_M_num_threads - 1;
00347 _SplitConsistently<__exact, _RAIter, _Compare, _SortingPlacesIterator>()
00348 (__iam, __sd, __comp, __num_samples);
00349
00350
00351 _DifferenceType __offset = 0, __length_am = 0;
00352 for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
00353 {
00354 __length_am += (__sd->_M_pieces[__iam][__s]._M_end
00355 - __sd->_M_pieces[__iam][__s]._M_begin);
00356 __offset += __sd->_M_pieces[__iam][__s]._M_begin;
00357 }
00358
00359 typedef std::vector<
00360 std::pair<_SortingPlacesIterator, _SortingPlacesIterator> >
00361 _SeqVector;
00362 _SeqVector __seqs(__sd->_M_num_threads);
00363
00364 for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
00365 {
00366 __seqs[__s] =
00367 std::make_pair(__sd->_M_temporary[__s]
00368 + __sd->_M_pieces[__iam][__s]._M_begin,
00369 __sd->_M_temporary[__s]
00370 + __sd->_M_pieces[__iam][__s]._M_end);
00371 }
00372
00373 __possibly_stable_multiway_merge<
00374 __stable, typename _SeqVector::iterator,
00375 _RAIter, _Compare, _DifferenceType>()(__seqs.begin(), __seqs.end(),
00376 __sd->_M_source + __offset, __comp,
00377 __length_am);
00378
00379 # pragma omp barrier
00380
00381 ::operator delete(__sd->_M_temporary[__iam]);
00382 }
00383
00384
00385
00386
00387
00388
00389
00390
00391 template<bool __stable, bool __exact, typename _RAIter,
00392 typename _Compare>
00393 void
00394 parallel_sort_mwms(_RAIter __begin, _RAIter __end,
00395 _Compare __comp,
00396 _ThreadIndex __num_threads)
00397 {
00398 _GLIBCXX_CALL(__end - __begin)
00399
00400 typedef std::iterator_traits<_RAIter> _TraitsType;
00401 typedef typename _TraitsType::value_type _ValueType;
00402 typedef typename _TraitsType::difference_type _DifferenceType;
00403
00404 _DifferenceType __n = __end - __begin;
00405
00406 if (__n <= 1)
00407 return;
00408
00409
00410 if (__num_threads > __n)
00411 __num_threads = static_cast<_ThreadIndex>(__n);
00412
00413
00414 _PMWMSSortingData<_RAIter> __sd;
00415 _DifferenceType* __starts;
00416
00417 # pragma omp parallel num_threads(__num_threads)
00418 {
00419 __num_threads = omp_get_num_threads();
00420
00421 # pragma omp single
00422 {
00423 __sd._M_num_threads = __num_threads;
00424 __sd._M_source = __begin;
00425
00426 __sd._M_temporary = new _ValueType*[__num_threads];
00427
00428 if (!__exact)
00429 {
00430 _DifferenceType __size =
00431 (_Settings::get().sort_mwms_oversampling * __num_threads - 1)
00432 * __num_threads;
00433 __sd._M_samples = static_cast<_ValueType*>
00434 (::operator new(__size * sizeof(_ValueType)));
00435 }
00436 else
00437 __sd._M_samples = NULL;
00438
00439 __sd._M_offsets = new _DifferenceType[__num_threads - 1];
00440 __sd._M_pieces
00441 = new std::vector<_Piece<_DifferenceType> >[__num_threads];
00442 for (_ThreadIndex __s = 0; __s < __num_threads; ++__s)
00443 __sd._M_pieces[__s].resize(__num_threads);
00444 __starts = __sd._M_starts = new _DifferenceType[__num_threads + 1];
00445
00446 _DifferenceType __chunk_length = __n / __num_threads;
00447 _DifferenceType __split = __n % __num_threads;
00448 _DifferenceType __pos = 0;
00449 for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
00450 {
00451 __starts[__i] = __pos;
00452 __pos += ((__i < __split)
00453 ? (__chunk_length + 1) : __chunk_length);
00454 }
00455 __starts[__num_threads] = __pos;
00456 }
00457
00458
00459 parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp);
00460 }
00461
00462 delete[] __starts;
00463 delete[] __sd._M_temporary;
00464
00465 if (!__exact)
00466 ::operator delete(__sd._M_samples);
00467
00468 delete[] __sd._M_offsets;
00469 delete[] __sd._M_pieces;
00470 }
00471
00472 }
00473
00474 #endif