Intel(R) Threading Building Blocks Doxygen Documentation  version 4.2.3
arena.h
Go to the documentation of this file.
1 /*
2  Copyright (c) 2005-2019 Intel Corporation
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 
16 
17 
18 
19 */
20 
21 #ifndef _TBB_arena_H
22 #define _TBB_arena_H
23 
24 #include "tbb/tbb_stddef.h"
25 #include "tbb/atomic.h"
26 
27 #include "tbb/tbb_machine.h"
28 
29 #include "scheduler_common.h"
30 #include "intrusive_list.h"
31 #if __TBB_PREVIEW_CRITICAL_TASKS && __TBB_CPF_BUILD
32 #include "task_stream_extended.h"
33 #else
34 #include "task_stream.h"
35 #endif
36 #include "../rml/include/rml_tbb.h"
37 #include "mailbox.h"
38 #include "observer_proxy.h"
39 #include "market.h"
40 #include "governor.h"
41 #include "concurrent_monitor.h"
42 
43 namespace tbb {
44 
45 class task_group_context;
46 class allocate_root_with_context_proxy;
47 
48 namespace internal {
49 
51 
53 struct arena_base : padded<intrusive_list_node> {
55  unsigned my_num_workers_allotted; // heavy use in stealing loop
56 
58 
61  atomic<unsigned> my_references; // heavy use in stealing loop
62 
63 #if __TBB_TASK_PRIORITY
64  volatile intptr_t my_top_priority; // heavy use in stealing loop
66 #endif /* !__TBB_TASK_PRIORITY */
67 
69  atomic<unsigned> my_limit; // heavy use in stealing loop
70 
72 
77 #if __TBB_PREVIEW_CRITICAL_TASKS && __TBB_CPF_BUILD
79 #else
80  task_stream<num_priority_levels> my_task_stream; // heavy use in stealing loop
81 #endif
82 
83 #if __TBB_PREVIEW_CRITICAL_TASKS
84 
88  // used on the hot path of the task dispatch loop
89  task_stream<1, back_nonnull_accessor> my_critical_task_stream;
90 #endif
91 
94 
97 
99 
104 
105 #if __TBB_ARENA_OBSERVER
106  observer_list my_observers;
108 #endif
109 
110 #if __TBB_TASK_PRIORITY
111  intptr_t my_bottom_priority;
113 
115 
117  uintptr_t my_reload_epoch;
118 
120  task* my_orphaned_tasks;
121 
123  tbb::atomic<uintptr_t> my_abandonment_epoch;
124 
126 
129  tbb::atomic<intptr_t> my_skipped_fifo_priority;
130 #endif /* !__TBB_TASK_PRIORITY */
131 
132  // Below are rarely modified members
133 
136 
138  uintptr_t my_aba_epoch;
139 
140 #if !__TBB_FP_CONTEXT
143 #endif
144 
145 #if __TBB_TASK_GROUP_CONTEXT
146 
149  task_group_context* my_default_ctx;
150 #endif /* __TBB_TASK_GROUP_CONTEXT */
151 
153  unsigned my_num_slots;
154 
157 
158 #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY
159  enum concurrency_mode {
161  cm_normal = 0, // arena is served by workers as usual
162  cm_enforced_local, // arena needs an extra worker despite the arena limit
163  cm_enforced_global // arena needs an extra worker despite a global limit
164  };
165 
167  concurrency_mode my_concurrency_mode;
168 #endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY */
169 
172 
173 #if TBB_USE_ASSERT
174  uintptr_t my_guard;
176 #endif /* TBB_USE_ASSERT */
177 }; // struct arena_base
178 
179 class arena: public padded<arena_base>
180 {
183 public:
185 
191  };
192 
194  arena ( market&, unsigned max_num_workers, unsigned num_reserved_slots );
195 
197  static arena& allocate_arena( market&, unsigned num_slots, unsigned num_reserved_slots );
198 
199  static int unsigned num_arena_slots ( unsigned num_slots ) {
200  return max(2u, num_slots);
201  }
202 
203  static int allocation_size ( unsigned num_slots ) {
204  return sizeof(base_type) + num_slots * (sizeof(mail_outbox) + sizeof(arena_slot));
205  }
206 
209  __TBB_ASSERT( 0<id, "affinity id must be positive integer" );
210  __TBB_ASSERT( id <= my_num_slots, "affinity id out of bounds" );
211 
212  return ((mail_outbox*)this)[-(int)id];
213  }
214 
216  void free_arena ();
217 
218  typedef uintptr_t pool_state_t;
219 
221  static const pool_state_t SNAPSHOT_EMPTY = 0;
222 
225 
227  static const unsigned ref_external_bits = 12; // up to 4095 external and 1M workers
228 
230  static const unsigned ref_external = 1;
231  static const unsigned ref_worker = 1<<ref_external_bits;
232 
234  static bool is_busy_or_empty( pool_state_t s ) { return s < SNAPSHOT_FULL; }
235 
237  unsigned num_workers_active( ) {
239  }
240 
242  template<arena::new_work_type work_type> void advertise_new_work();
243 
245 
246  bool is_out_of_work();
247 
249  void enqueue_task( task&, intptr_t, FastRandom & );
250 
252  void process( generic_scheduler& );
253 
255  template<unsigned ref_param>
256  inline void on_thread_leaving ( );
257 
258 #if __TBB_STATISTICS
259  void dump_arena_statistics ();
261 #endif /* __TBB_STATISTICS */
262 
263 #if __TBB_TASK_PRIORITY
264 
266  inline bool may_have_tasks ( generic_scheduler*, bool& tasks_present, bool& dequeuing_possible );
267 
269  void orphan_offloaded_tasks ( generic_scheduler& s );
270 #endif /* __TBB_TASK_PRIORITY */
271 
272 #if __TBB_COUNT_TASK_NODES
273  intptr_t workers_task_node_count();
275 #endif
276 
278  bool has_enqueued_tasks();
279 
280 #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY
281  bool recall_by_mandatory_request() const {
283  return my_market->my_mandatory_num_requested && my_concurrency_mode==cm_normal;
284  }
285 
287  bool must_have_concurrency() const {
288  return my_num_workers_requested &&
289  ( my_concurrency_mode==cm_enforced_local || my_concurrency_mode==cm_enforced_global );
290  }
291 #endif
292  static const size_t out_of_arena = ~size_t(0);
294  template <bool as_worker>
297  size_t occupy_free_slot_in_range( generic_scheduler& s, size_t lower, size_t upper );
298 
301 }; // class arena
302 
303 template<unsigned ref_param>
304 inline void arena::on_thread_leaving ( ) {
305  //
306  // Implementation of arena destruction synchronization logic contained various
307  // bugs/flaws at the different stages of its evolution, so below is a detailed
308  // description of the issues taken into consideration in the framework of the
309  // current design.
310  //
311  // In case of using fire-and-forget tasks (scheduled via task::enqueue())
312  // master thread is allowed to leave its arena before all its work is executed,
313  // and market may temporarily revoke all workers from this arena. Since revoked
314  // workers never attempt to reset arena state to EMPTY and cancel its request
315  // to RML for threads, the arena object is destroyed only when both the last
316  // thread is leaving it and arena's state is EMPTY (that is its master thread
317  // left and it does not contain any work).
318  // Thus resetting arena to EMPTY state (as earlier TBB versions did) should not
319  // be done here (or anywhere else in the master thread to that matter); doing so
320  // can result either in arena's premature destruction (at least without
321  // additional costly checks in workers) or in unnecessary arena state changes
322  // (and ensuing workers migration).
323  //
324  // A worker that checks for work presence and transitions arena to the EMPTY
325  // state (in snapshot taking procedure arena::is_out_of_work()) updates
326  // arena::my_pool_state first and only then arena::my_num_workers_requested.
327  // So the check for work absence must be done against the latter field.
328  //
329  // In a time window between decrementing the active threads count and checking
330  // if there is an outstanding request for workers. New worker thread may arrive,
331  // finish remaining work, set arena state to empty, and leave decrementing its
332  // refcount and destroying. Then the current thread will destroy the arena
333  // the second time. To preclude it a local copy of the outstanding request
334  // value can be stored before decrementing active threads count.
335  //
336  // But this technique may cause two other problem. When the stored request is
337  // zero, it is possible that arena still has threads and they can generate new
338  // tasks and thus re-establish non-zero requests. Then all the threads can be
339  // revoked (as described above) leaving this thread the last one, and causing
340  // it to destroy non-empty arena.
341  //
342  // The other problem takes place when the stored request is non-zero. Another
343  // thread may complete the work, set arena state to empty, and leave without
344  // arena destruction before this thread decrements the refcount. This thread
345  // cannot destroy the arena either. Thus the arena may be "orphaned".
346  //
347  // In both cases we cannot dereference arena pointer after the refcount is
348  // decremented, as our arena may already be destroyed.
349  //
350  // If this is the master thread, the market is protected by refcount to it.
351  // In case of workers market's liveness is ensured by the RML connection
352  // rundown protocol, according to which the client (i.e. the market) lives
353  // until RML server notifies it about connection termination, and this
354  // notification is fired only after all workers return into RML.
355  //
356  // Thus if we decremented refcount to zero we ask the market to check arena
357  // state (including the fact if it is alive) under the lock.
358  //
359  uintptr_t aba_epoch = my_aba_epoch;
360  market* m = my_market;
361  __TBB_ASSERT(my_references >= ref_param, "broken arena reference counter");
362 #if __TBB_STATISTICS_EARLY_DUMP
363  // While still holding a reference to the arena, compute how many external references are left.
364  // If just one, dump statistics.
365  if ( modulo_power_of_two(my_references,ref_worker)==ref_param ) // may only be true with ref_external
366  GATHER_STATISTIC( dump_arena_statistics() );
367 #endif
368 #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY
369  // When there is no workers someone must free arena, as
370  // without workers, no one calls is_out_of_work().
371  // Skip workerless arenas because they have no demand for workers.
372  // TODO: consider more strict conditions for the cleanup,
373  // because it can create the demand of workers,
374  // but the arena can be already empty (and so ready for destroying)
375  if( ref_param==ref_external && my_num_slots != my_num_reserved_slots
376  && 0 == m->my_num_workers_soft_limit && my_concurrency_mode==cm_normal ) {
377  bool is_out = false;
378  for (int i=0; i<num_priority_levels; i++) {
379  is_out = is_out_of_work();
380  if (is_out)
381  break;
382  }
383  // We expect, that in worst case it's enough to have num_priority_levels-1
384  // calls to restore priorities and and yet another is_out_of_work() to conform
385  // that no work was found. But as market::set_active_num_workers() can be called
386  // concurrently, can't guarantee last is_out_of_work() return true.
387  }
388 #endif
389  if ( (my_references -= ref_param ) == 0 )
390  m->try_destroy_arena( this, aba_epoch );
391 }
392 
393 template<arena::new_work_type work_type> void arena::advertise_new_work() {
394  if( work_type == work_enqueued ) {
395 #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY
397  if( my_concurrency_mode!=cm_enforced_global ) {
398  if( my_market->mandatory_concurrency_enable( this ) ) {
400  return;
401  }
402  }
403  } else if( my_max_num_workers==0 && my_num_reserved_slots==1 ) {
404  my_max_num_workers = 1;
405  __TBB_ASSERT(my_concurrency_mode==cm_normal, NULL);
406  my_concurrency_mode = cm_enforced_local;
408  my_market->adjust_demand( *this, 1 );
409  return;
410  }
411 #endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY */
412  // Local memory fence here and below is required to avoid missed wakeups; see the comment below.
413  // Starvation resistant tasks require concurrency, so missed wakeups are unacceptable.
414  atomic_fence();
415  }
416  else if( work_type == wakeup ) {
417  __TBB_ASSERT(my_max_num_workers!=0, "Unexpected worker wakeup request");
418  atomic_fence();
419  }
420  // Double-check idiom that, in case of spawning, is deliberately sloppy about memory fences.
421  // Technically, to avoid missed wakeups, there should be a full memory fence between the point we
422  // released the task pool (i.e. spawned task) and read the arena's state. However, adding such a
423  // fence might hurt overall performance more than it helps, because the fence would be executed
424  // on every task pool release, even when stealing does not occur. Since TBB allows parallelism,
425  // but never promises parallelism, the missed wakeup is not a correctness problem.
426  pool_state_t snapshot = my_pool_state;
427  if( is_busy_or_empty(snapshot) ) {
428  // Attempt to mark as full. The compare_and_swap below is a little unusual because the
429  // result is compared to a value that can be different than the comparand argument.
431  if( snapshot!=SNAPSHOT_EMPTY ) {
432  // This thread read "busy" into snapshot, and then another thread transitioned
433  // my_pool_state to "empty" in the meantime, which caused the compare_and_swap above
434  // to fail. Attempt to transition my_pool_state from "empty" to "full".
436  // Some other thread transitioned my_pool_state from "empty", and hence became
437  // responsible for waking up workers.
438  return;
439  }
440  }
441  // This thread transitioned pool from empty to full state, and thus is responsible for
442  // telling the market that there is work to do.
443 #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY
444  if( work_type == work_spawned ) {
445  if( my_concurrency_mode!=cm_normal ) {
446  switch( my_concurrency_mode ) {
447  case cm_enforced_local:
449  __TBB_ASSERT(!governor::local_scheduler()->is_worker(), "");
450  // There was deliberate oversubscription on 1 core for sake of starvation-resistant tasks.
451  // Now a single active thread (must be the master) supposedly starts a new parallel region
452  // with relaxed sequential semantics, and oversubscription should be avoided.
453  // Demand for workers has been decreased to 0 during SNAPSHOT_EMPTY, so just keep it.
454  my_max_num_workers = 0;
455  my_concurrency_mode = cm_normal;
456  break;
457  case cm_enforced_global:
458  my_market->mandatory_concurrency_disable( this );
460  break;
461  default:
462  break;
463  }
464  return;
465  }
466  }
467 #endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY */
468  // TODO: investigate adjusting of arena's demand by a single worker.
470  }
471  }
472 }
473 
474 } // namespace internal
475 } // namespace tbb
476 
477 #endif /* _TBB_arena_H */
The container for "fairness-oriented" aka "enqueued" tasks.
Definition: task_stream.h:73
void free_arena()
Completes arena shutdown, destructs and deallocates it.
Definition: arena.cpp:253
static int unsigned num_arena_slots(unsigned num_slots)
Definition: arena.h:199
static int allocation_size(unsigned num_slots)
Definition: arena.h:203
static const unsigned ref_worker
Definition: arena.h:231
unsigned my_num_workers_allotted
The number of workers that have been marked out by the resource manager to service the arena.
Definition: arena.h:55
unsigned short affinity_id
An id as used for specifying affinity.
Definition: task.h:124
market * my_market
The market that owns this arena.
Definition: arena.h:135
unsigned my_num_reserved_slots
The number of reserved slots (can be occupied only by masters).
Definition: arena.h:156
static generic_scheduler * local_scheduler()
Obtain the thread-local instance of the TBB scheduler.
Definition: governor.h:126
argument_integer_type modulo_power_of_two(argument_integer_type arg, divisor_integer_type divisor)
A function to compute arg modulo divisor where divisor is a power of 2.
Definition: tbb_stddef.h:365
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:169
void adjust_demand(arena &, int delta)
Request that arena's need in workers should be adjusted.
Definition: market.cpp:590
size_t occupy_free_slot(generic_scheduler &s)
Tries to occupy a slot in the arena. On success, returns the slot index; if no slot is available,...
Definition: arena.cpp:90
static const size_t out_of_arena
Definition: arena.h:292
cpu_ctl_env my_cpu_ctl_env
FPU control settings of arena's master thread captured at the moment of arena instantiation.
Definition: arena.h:142
static const pool_state_t SNAPSHOT_FULL
At least one task has been offered for stealing since the last snapshot started.
Definition: arena.h:224
void on_thread_leaving()
Notification that worker or master leaves its arena.
Definition: arena.h:304
Used to form groups of tasks.
Definition: task.h:335
A fast random number generator.
Definition: tbb_misc.h:132
The structure of an arena, except the array of slots.
Definition: arena.h:53
void enqueue_task(task &, intptr_t, FastRandom &)
enqueue a task into starvation-resistance queue
Definition: arena.cpp:558
Base class for user-defined tasks.
Definition: task.h:592
Work stealing task scheduler.
Definition: scheduler.h:124
static const unsigned ref_external
Reference increment values for externals and workers.
Definition: arena.h:230
unsigned my_num_slots
The number of slots in the arena.
Definition: arena.h:153
arena(market &, unsigned max_num_workers, unsigned num_reserved_slots)
Constructor.
Definition: arena.cpp:190
void atomic_fence()
Sequentially consistent full memory fence.
Definition: tbb_machine.h:343
arena_slot my_slots[1]
Definition: arena.h:300
padded< arena_base > base_type
Definition: arena.h:184
size_t occupy_free_slot_in_range(generic_scheduler &s, size_t lower, size_t upper)
Tries to occupy a slot in the specified range.
Definition: arena.cpp:75
tbb::atomic< uintptr_t > my_pool_state
Current task pool state and estimate of available tasks amount.
Definition: arena.h:103
T max(const T &val1, const T &val2)
Utility template function returning greater of the two values.
Definition: tbb_misc.h:116
atomic< unsigned > my_references
Reference counter for the arena.
Definition: arena.h:61
bool is_out_of_work()
Check if there is job anywhere in arena.
Definition: arena.cpp:407
unsigned my_max_num_workers
The number of workers requested by the master thread owning the arena.
Definition: arena.h:93
static const pool_state_t SNAPSHOT_EMPTY
No tasks to steal since last snapshot was taken.
Definition: arena.h:221
static const intptr_t num_priority_levels
The graph class.
int my_num_workers_requested
The number of workers that are currently requested from the resource manager.
Definition: arena.h:96
#define GATHER_STATISTIC(x)
static arena & allocate_arena(market &, unsigned num_slots, unsigned num_reserved_slots)
Allocate an instance of arena.
Definition: arena.cpp:242
bool has_enqueued_tasks()
Check for the presence of enqueued tasks at all priority levels.
Definition: arena.cpp:379
void restore_priority_if_need()
If enqueued tasks found, restore arena priority and task presence status.
Definition: arena.cpp:387
atomic< unsigned > my_limit
The maximal number of currently busy slots.
Definition: arena.h:69
uintptr_t my_aba_epoch
ABA prevention marker.
Definition: arena.h:138
static const unsigned ref_external_bits
The number of least significant bits for external references.
Definition: arena.h:227
void const char const char int ITT_FORMAT __itt_group_sync s
mail_outbox & mailbox(affinity_id id)
Get reference to mailbox corresponding to given affinity_id.
Definition: arena.h:208
concurrent_monitor my_exit_monitors
Waiting object for master threads that cannot join the arena.
Definition: arena.h:171
value_type compare_and_swap(value_type value, value_type comparand)
Definition: atomic.h:289
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d int
task_stream< num_priority_levels > my_task_stream
Task pool for the tasks scheduled via task::enqueue() method.
Definition: arena.h:80
void try_destroy_arena(arena *, uintptr_t aba_epoch)
Removes the arena from the market's list.
Definition: market.cpp:322
static bool is_busy_or_empty(pool_state_t s)
No tasks to steal or snapshot is being taken.
Definition: arena.h:234
Class representing where mail is put.
Definition: mailbox.h:100
unsigned num_workers_active()
The number of workers active in the arena.
Definition: arena.h:237
void advertise_new_work()
If necessary, raise a flag that there is new job in arena.
Definition: arena.h:393
unsigned my_num_workers_soft_limit
Current application-imposed limit on the number of workers (see set_active_num_workers())
Definition: market.h:82
void process(generic_scheduler &)
Registers the worker with the arena and enters TBB scheduler dispatch loop.
Definition: arena.cpp:106
new_work_type
Types of work advertised by advertise_new_work()
Definition: arena.h:187
uintptr_t pool_state_t
Definition: arena.h:218
Pads type T to fill out to a multiple of cache line size.
Definition: tbb_stddef.h:265

Copyright © 2005-2019 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.