18 #ifndef TLX_SORT_PARALLEL_MERGESORT_HEADER
19 #define TLX_SORT_PARALLEL_MERGESORT_HEADER
40 namespace parallel_mergesort_detail {
43 template <
typename DiffType>
56 template <
typename RandomAccessIterator>
59 typename std::iterator_traits<RandomAccessIterator>::value_type;
61 typename std::iterator_traits<RandomAccessIterator>::difference_type;
64 RandomAccessIterator
source;
92 template <
typename RandomAccessIterator,
typename DiffType>
103 static_cast<size_t>(num_samples + 1), es.
begin());
105 for (
DiffType i = 0; i < num_samples; i++) {
119 template <
bool Stable,
typename RandomAccessIterator,
typename Comparator>
127 typename std::iterator_traits<RandomAccessIterator>::value_type;
129 typename std::iterator_traits<RandomAccessIterator>::difference_type;
132 DiffType length_local = sd->starts[iam + 1] - sd->starts[iam];
134 using SortingPlacesIterator = ValueType *;
137 sd->temporary[iam] =
static_cast<ValueType*
>(
138 ::operator
new (
sizeof(ValueType) * (length_local + 1)));
141 std::uninitialized_copy(sd->source + sd->starts[iam],
142 sd->source + sd->starts[iam] + length_local,
147 std::stable_sort(sd->temporary[iam],
148 sd->temporary[iam] + length_local, comp);
150 std::sort(sd->temporary[iam],
151 sd->temporary[iam] + length_local, comp);
158 DiffType num_samples;
163 std::sort(sd->samples.begin(), sd->samples.end(), comp);
166 for (
size_t s = 0; s < num_threads; s++)
169 if (num_samples * iam > 0)
170 sd->pieces[iam][s].begin =
171 std::lower_bound(sd->temporary[s],
172 sd->temporary[s] + sd->starts[s + 1] - sd->starts[s],
173 sd->samples[num_samples * iam],
178 sd->pieces[iam][s].begin = 0;
180 if ((num_samples * (iam + 1)) < (num_samples * num_threads))
181 sd->pieces[iam][s].end =
182 std::lower_bound(sd->temporary[s],
183 sd->temporary[s] + sd->starts[s + 1] - sd->starts[s],
184 sd->samples[num_samples * (iam + 1)],
189 sd->pieces[iam][s].end = sd->starts[s + 1] - sd->starts[s];
197 SortingPlacesIterator> > seqs(num_threads);
199 for (
size_t s = 0; s < num_threads; s++)
200 seqs[s] = std::make_pair(
202 sd->temporary[s] + sd->starts[s + 1] - sd->starts[s]);
204 simple_vector<SortingPlacesIterator> offsets(num_threads);
207 if (iam < num_threads - 1)
209 sd->starts[iam + 1], offsets.begin(), comp);
211 for (
size_t seq = 0; seq < num_threads; seq++)
214 if (iam < (num_threads - 1))
215 sd->pieces[iam][seq].end = offsets[seq] - seqs[seq].first;
218 sd->pieces[iam][seq].end = sd->starts[seq + 1] - sd->starts[seq];
223 for (
size_t seq = 0; seq < num_threads; seq++)
227 sd->pieces[iam][seq].begin = sd->pieces[iam - 1][seq].end;
230 sd->pieces[iam][seq].begin = 0;
235 DiffType offset = 0, length_am = 0;
236 for (
size_t s = 0; s < num_threads; s++)
238 length_am += sd->pieces[iam][s].end - sd->pieces[iam][s].begin;
239 offset += sd->pieces[iam][s].begin;
245 SortingPlacesIterator> > seqs(num_threads);
247 for (
size_t s = 0; s < num_threads; s++)
249 seqs[s] = std::make_pair(
250 sd->temporary[s] + sd->pieces[iam][s].begin,
251 sd->temporary[s] + sd->pieces[iam][s].end);
255 seqs.begin(), seqs.end(),
256 sd->source + offset, length_am, comp);
260 operator delete (sd->temporary[iam]);
278 template <
bool Stable,
279 typename RandomAccessIterator,
typename Comparator>
281 RandomAccessIterator begin,
282 RandomAccessIterator end,
284 size_t num_threads = std::thread::hardware_concurrency(),
287 using namespace parallel_mergesort_detail;
290 typename std::iterator_traits<RandomAccessIterator>::difference_type;
292 DiffType n = end - begin;
298 if (num_threads >
static_cast<size_t>(n))
299 num_threads =
static_cast<size_t>(n);
301 PMWMSSortingData<RandomAccessIterator> sd(num_threads);
309 for (
size_t s = 0; s < num_threads; s++)
310 sd.pieces[s].resize(num_threads);
312 DiffType* starts = sd.starts.data();
314 DiffType chunk_length = n / num_threads,
split = n % num_threads, start = 0;
315 for (
size_t i = 0; i < num_threads; i++)
318 start += (i < static_cast<size_t>(
split))
319 ? (chunk_length + 1) : chunk_length;
321 starts[num_threads] = start;
325 ThreadBarrierMutex barrier(num_threads);
328 #pragma omp parallel num_threads(num_threads)
330 size_t iam = omp_get_thread_num();
331 parallel_sort_mwms_pu<Stable>(
332 &sd, iam, num_threads, barrier, comp, mwmsa);
335 simple_vector<std::thread> threads(num_threads);
336 for (
size_t iam = 0; iam < num_threads; ++iam) {
337 threads[iam] = std::thread(
339 parallel_sort_mwms_pu<Stable>(
340 &sd, iam, num_threads, barrier, comp, mwmsa);
343 for (
size_t i = 0; i < num_threads; i++) {
346 #endif // defined(_OPENMP)
358 template <
typename RandomAccessIterator,
359 typename Comparator = std::less<
360 typename std::iterator_traits<RandomAccessIterator>::value_type> >
362 RandomAccessIterator begin,
363 RandomAccessIterator end,
364 Comparator comp = Comparator(),
365 size_t num_threads = std::thread::hardware_concurrency(),
369 begin, end, comp, num_threads, mwmsa);
381 template <
typename RandomAccessIterator,
382 typename Comparator = std::less<
383 typename std::iterator_traits<RandomAccessIterator>::value_type> >
385 RandomAccessIterator begin,
386 RandomAccessIterator end,
387 Comparator comp = Comparator(),
388 size_t num_threads = std::thread::hardware_concurrency(),
392 begin, end, comp, num_threads, mwmsa);
400 #endif // !TLX_SORT_PARALLEL_MERGESORT_HEADER