Intel(R) Threading Building Blocks Doxygen Documentation  version 4.2.3
tbb/parallel_for.h
Go to the documentation of this file.
1 /*
2  Copyright (c) 2005-2019 Intel Corporation
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 
16 
17 
18 
19 */
20 
21 #ifndef __TBB_parallel_for_H
22 #define __TBB_parallel_for_H
23 
24 #include <new>
25 #include "task.h"
26 #include "partitioner.h"
27 #include "blocked_range.h"
28 #include "tbb_exception.h"
30 
31 namespace tbb {
32 
33 namespace interface9 {
35 namespace internal {
36 
38  void* allocate_sibling(task* start_for_task, size_t bytes);
39 
41 
42  template<typename Range, typename Body, typename Partitioner>
43  class start_for: public task {
44  Range my_range;
45  const Body my_body;
46  typename Partitioner::task_partition_type my_partition;
48 
51  my_partition.note_affinity( id );
52  }
53 
54  public:
56  start_for( const Range& range, const Body& body, Partitioner& partitioner ) :
57  my_range(range),
58  my_body(body),
59  my_partition(partitioner)
60  {
61  tbb::internal::fgt_algorithm(tbb::internal::PARALLEL_FOR_TASK, this, NULL);
62  }
64 
65  start_for( start_for& parent_, typename Partitioner::split_type& split_obj) :
66  my_range(parent_.my_range, split_obj),
67  my_body(parent_.my_body),
68  my_partition(parent_.my_partition, split_obj)
69  {
70  my_partition.set_affinity(*this);
71  tbb::internal::fgt_algorithm(tbb::internal::PARALLEL_FOR_TASK, this, (void *)&parent_);
72  }
74 
75  start_for( start_for& parent_, const Range& r, depth_t d ) :
76  my_range(r),
77  my_body(parent_.my_body),
78  my_partition(parent_.my_partition, split())
79  {
80  my_partition.set_affinity(*this);
81  my_partition.align_depth( d );
82  tbb::internal::fgt_algorithm(tbb::internal::PARALLEL_FOR_TASK, this, (void *)&parent_);
83  }
84  static void run( const Range& range, const Body& body, Partitioner& partitioner ) {
85  if( !range.empty() ) {
86 #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
87  start_for& a = *new(task::allocate_root()) start_for(range,body,partitioner);
88 #else
89  // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
90  // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
91  task_group_context context(PARALLEL_FOR);
92  start_for& a = *new(task::allocate_root(context)) start_for(range,body,partitioner);
93 #endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
94  // REGION BEGIN
95  fgt_begin_algorithm( tbb::internal::PARALLEL_FOR_TASK, (void*)&context );
97  fgt_end_algorithm( (void*)&context );
98  // REGION END
99  }
100  }
101 #if __TBB_TASK_GROUP_CONTEXT
102  static void run( const Range& range, const Body& body, Partitioner& partitioner, task_group_context& context ) {
103  if( !range.empty() ) {
104  start_for& a = *new(task::allocate_root(context)) start_for(range,body,partitioner);
105  // REGION BEGIN
106  fgt_begin_algorithm( tbb::internal::PARALLEL_FOR_TASK, (void*)&context );
108  fgt_end_algorithm( (void*)&context );
109  // END REGION
110  }
111  }
112 #endif /* __TBB_TASK_GROUP_CONTEXT */
113  void run_body( Range &r ) {
115  fgt_alg_begin_body( tbb::internal::PARALLEL_FOR_TASK, (void *)const_cast<Body*>(&(this->my_body)), (void*)this );
116  my_body( r );
117  fgt_alg_end_body( (void *)const_cast<Body*>(&(this->my_body)) );
118  }
119 
121  void offer_work(typename Partitioner::split_type& split_obj) {
122  spawn( *new( allocate_sibling(static_cast<task*>(this), sizeof(start_for)) ) start_for(*this, split_obj) );
123  }
125  void offer_work(const Range& r, depth_t d = 0) {
126  spawn( *new( allocate_sibling(static_cast<task*>(this), sizeof(start_for)) ) start_for(*this, r, d) );
127  }
128  };
129 
131  // TODO: 'inline' here is to avoid multiple definition error but for sake of code size this should not be inlined
132  inline void* allocate_sibling(task* start_for_task, size_t bytes) {
133  task* parent_ptr = new( start_for_task->allocate_continuation() ) flag_task();
134  start_for_task->set_parent(parent_ptr);
135  parent_ptr->set_ref_count(2);
136  return &parent_ptr->allocate_child().allocate(bytes);
137  }
138 
140  template<typename Range, typename Body, typename Partitioner>
142  my_partition.check_being_stolen( *this );
143  my_partition.execute(*this, my_range);
144  return NULL;
145  }
146 } // namespace internal
148 } // namespace interfaceX
149 
151 namespace internal {
153 
155  template<typename Function, typename Index>
156  class parallel_for_body : internal::no_assign {
157  const Function &my_func;
158  const Index my_begin;
159  const Index my_step;
160  public:
161  parallel_for_body( const Function& _func, Index& _begin, Index& _step )
162  : my_func(_func), my_begin(_begin), my_step(_step) {}
163 
164  void operator()( const tbb::blocked_range<Index>& r ) const {
165  // A set of local variables to help the compiler with vectorization of the following loop.
166  Index b = r.begin();
167  Index e = r.end();
168  Index ms = my_step;
169  Index k = my_begin + b*ms;
170 
171 #if __INTEL_COMPILER
172 #pragma ivdep
173 #if __TBB_ASSERT_ON_VECTORIZATION_FAILURE
174 #pragma vector always assert
175 #endif
176 #endif
177  for ( Index i = b; i < e; ++i, k += ms ) {
178  my_func( k );
179  }
180  }
181  };
182 } // namespace internal
184 
185 // Requirements on Range concept are documented in blocked_range.h
186 
197 
199 
200 template<typename Range, typename Body>
201 void parallel_for( const Range& range, const Body& body ) {
203 }
204 
206 
207 template<typename Range, typename Body>
208 void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner ) {
210 }
211 
213 
214 template<typename Range, typename Body>
215 void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner ) {
217 }
218 
220 
221 template<typename Range, typename Body>
222 void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner ) {
224 }
225 
227 
228 template<typename Range, typename Body>
229 void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner ) {
231 }
232 
233 #if __TBB_TASK_GROUP_CONTEXT
234 
236 template<typename Range, typename Body>
237 void parallel_for( const Range& range, const Body& body, task_group_context& context ) {
239 }
240 
242 
243 template<typename Range, typename Body>
244 void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
245  internal::start_for<Range,Body,const simple_partitioner>::run(range, body, partitioner, context);
246 }
247 
249 
250 template<typename Range, typename Body>
251 void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
252  internal::start_for<Range,Body,const auto_partitioner>::run(range, body, partitioner, context);
253 }
254 
256 
257 template<typename Range, typename Body>
258 void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner, task_group_context& context ) {
259  internal::start_for<Range,Body,const static_partitioner>::run(range, body, partitioner, context);
260 }
261 
263 
264 template<typename Range, typename Body>
265 void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
266  internal::start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner, context);
267 }
268 #endif /* __TBB_TASK_GROUP_CONTEXT */
269 
270 
271 namespace strict_ppl {
272 
274 template <typename Index, typename Function, typename Partitioner>
276 void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner) {
277  if (step <= 0 )
278  internal::throw_exception(internal::eid_nonpositive_step); // throws std::invalid_argument
279  else if (last > first) {
280  // Above "else" avoids "potential divide by zero" warning on some platforms
281  Index end = (last - first - Index(1)) / step + Index(1);
282  tbb::blocked_range<Index> range(static_cast<Index>(0), end);
283  internal::parallel_for_body<Function, Index> body(f, first, step);
284  tbb::parallel_for(range, body, partitioner);
285  }
286 }
287 
289 template <typename Index, typename Function>
290 void parallel_for(Index first, Index last, Index step, const Function& f) {
291  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner());
292 }
294 template <typename Index, typename Function>
295 void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner) {
296  parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner);
297 }
299 template <typename Index, typename Function>
300 void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner) {
301  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner);
302 }
304 template <typename Index, typename Function>
305 void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner) {
306  parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner);
307 }
309 template <typename Index, typename Function>
310 void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner) {
311  parallel_for_impl(first, last, step, f, partitioner);
312 }
313 
315 template <typename Index, typename Function>
316 void parallel_for(Index first, Index last, const Function& f) {
317  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner());
318 }
320 template <typename Index, typename Function>
321 void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner) {
322  parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
323 }
325 template <typename Index, typename Function>
326 void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner) {
327  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
328 }
330 template <typename Index, typename Function>
331 void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner) {
332  parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
333 }
335 template <typename Index, typename Function>
336 void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner) {
337  parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner);
338 }
339 
340 #if __TBB_TASK_GROUP_CONTEXT
341 template <typename Index, typename Function, typename Partitioner>
343 void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner, tbb::task_group_context &context) {
344  if (step <= 0 )
345  internal::throw_exception(internal::eid_nonpositive_step); // throws std::invalid_argument
346  else if (last > first) {
347  // Above "else" avoids "potential divide by zero" warning on some platforms
348  Index end = (last - first - Index(1)) / step + Index(1);
349  tbb::blocked_range<Index> range(static_cast<Index>(0), end);
350  internal::parallel_for_body<Function, Index> body(f, first, step);
351  tbb::parallel_for(range, body, partitioner, context);
352  }
353 }
354 
356 template <typename Index, typename Function>
357 void parallel_for(Index first, Index last, Index step, const Function& f, tbb::task_group_context &context) {
358  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner(), context);
359 }
361  template <typename Index, typename Function>
362 void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner, tbb::task_group_context &context) {
363  parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner, context);
364 }
366  template <typename Index, typename Function>
367 void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner, tbb::task_group_context &context) {
368  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner, context);
369 }
371 template <typename Index, typename Function>
372 void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner, tbb::task_group_context &context) {
373  parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner, context);
374 }
376  template <typename Index, typename Function>
377 void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner, tbb::task_group_context &context) {
378  parallel_for_impl(first, last, step, f, partitioner, context);
379 }
380 
381 
383 template <typename Index, typename Function>
384 void parallel_for(Index first, Index last, const Function& f, tbb::task_group_context &context) {
385  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner(), context);
386 }
388  template <typename Index, typename Function>
389 void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner, tbb::task_group_context &context) {
390  parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
391 }
393  template <typename Index, typename Function>
394 void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner, tbb::task_group_context &context) {
395  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
396 }
398 template <typename Index, typename Function>
399 void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner, tbb::task_group_context &context) {
400  parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
401 }
403  template <typename Index, typename Function>
404 void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner, tbb::task_group_context &context) {
405  parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner, context);
406 }
407 
408 #endif /* __TBB_TASK_GROUP_CONTEXT */
409 
410 
411 } // namespace strict_ppl
412 
414 
415 } // namespace tbb
416 
417 #if TBB_PREVIEW_SERIAL_SUBSET
418 #define __TBB_NORMAL_EXECUTION
419 #include "../serial/tbb/parallel_for.h"
420 #undef __TBB_NORMAL_EXECUTION
421 #endif
422 
423 #endif /* __TBB_parallel_for_H */
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp end
const_iterator begin() const
Beginning of range.
Definition: blocked_range.h:73
#define __TBB_override
Definition: tbb_stddef.h:244
internal::allocate_child_proxy & allocate_child()
Returns proxy for overloaded new that allocates a child task of *this.
Definition: task.h:654
Calls the function with values from range [begin, end) with a step provided.
static void fgt_alg_begin_body(string_index, void *, void *)
void note_affinity(affinity_id id) __TBB_override
Update affinity info, if any.
An auto partitioner.
Definition: partitioner.h:614
A range over which to iterate.
Definition: blocked_range.h:49
virtual task * execute()=0
Should be overridden by derived classes.
auto first(Container &c) -> decltype(begin(c))
Used to form groups of tasks.
Definition: task.h:335
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d
static void spawn_root_and_wait(task &root)
Spawn task allocated by allocate_root, wait for it to complete, and deallocate it.
Definition: task.h:781
void * allocate_sibling(task *start_for_task, size_t bytes)
allocate right task with new parent
Base class for user-defined tasks.
Definition: task.h:592
void set_parent(task *p)
sets parent task pointer to specified value
Definition: task.h:833
static void fgt_end_algorithm(void *)
void offer_work(const Range &r, depth_t d=0)
spawn right task, serves as callback for partitioner
void throw_exception(exception_id eid)
Versionless convenience wrapper for throw_exception_v4()
static void fgt_begin_algorithm(string_index, void *)
internal::affinity_id affinity_id
An id as used for specifying affinity.
Definition: task.h:879
auto last(Container &c) -> decltype(begin(c))
start_for(start_for &parent_, typename Partitioner::split_type &split_obj)
Splitting constructor used to generate children.
internal::allocate_continuation_proxy & allocate_continuation()
Returns proxy for overloaded new that allocates a continuation task of *this.
Definition: task.h:649
const_iterator end() const
One past last value in range.
Definition: blocked_range.h:76
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task * task
Partitioner::task_partition_type my_partition
void parallel_for(Index first, Index last, Index step, const Function &f)
Parallel iteration over a range of integers with a step provided and default partitioner.
The graph class.
Dummy type that distinguishes splitting constructor from copy constructor.
Definition: tbb_stddef.h:399
task * execute() __TBB_override
execute task for parallel_for
Task type used in parallel_for.
An affinity partitioner.
Definition: partitioner.h:652
void parallel_for(const Range &range, const Body &body)
Parallel iteration over range with default partitioner.
static void run(const Range &range, const Body &body, Partitioner &partitioner)
static void fgt_alg_end_body(void *)
#define __TBB_DEFAULT_PARTITIONER
Definition: tbb_config.h:597
void operator()(const tbb::blocked_range< Index > &r) const
static void run(const Range &range, const Body &body, Partitioner &partitioner, task_group_context &context)
void set_ref_count(int count)
Set reference count.
Definition: task.h:734
parallel_for_body(const Function &_func, Index &_begin, Index &_step)
void offer_work(typename Partitioner::split_type &split_obj)
spawn right task, serves as callback for partitioner
task_group_context * context()
This method is deprecated and will be removed in the future.
Definition: task.h:843
static internal::allocate_root_proxy allocate_root()
Returns proxy for overloaded new that allocates a root task.
Definition: task.h:636
static void fgt_algorithm(string_index, void *, void *)
Join task node that contains shared flag for stealing feedback.
Definition: partitioner.h:129
void parallel_for_impl(Index first, Index last, Index step, const Function &f, Partitioner &partitioner)
Implementation of parallel iteration over stepped range of integers with explicit step and partitione...
start_for(start_for &parent_, const Range &r, depth_t d)
Construct right child from the given range as response to the demand.
void run_body(Range &r)
Run body for range, serves as callback for partitioner.
start_for(const Range &range, const Body &body, Partitioner &partitioner)
Constructor for root task.
A static partitioner.
Definition: partitioner.h:633
A simple partitioner.
Definition: partitioner.h:587

Copyright © 2005-2019 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.