Intel(R) Threading Building Blocks Doxygen Documentation  version 4.2.3
tbb_bind.cpp
Go to the documentation of this file.
1 /*
2  Copyright (c) 2005-2020 Intel Corporation
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 */
16 
17 #include "../tbb/tbb_assert_impl.h" // Out-of-line TBB assertion handling routines are instantiated here.
18 #include "tbb/tbb_stddef.h"
19 
20 #if _MSC_VER && !__INTEL_COMPILER
21 #pragma warning( push )
22 #pragma warning( disable : 4100 )
23 #endif
24 #include <hwloc.h>
25 #if _MSC_VER && !__INTEL_COMPILER
26 #pragma warning( pop )
27 #endif
28 
29 #include <vector>
30 
31 // Most of hwloc calls returns negative exit code on error.
32 // This macro tracks error codes that are returned from the hwloc interfaces.
33 #define assertion_hwloc_wrapper(command, ...) \
34  __TBB_ASSERT_EX( (command(__VA_ARGS__)) >= 0, "Error occurred during call to hwloc API.");
35 
36 namespace tbb {
37 namespace internal {
38 
39 //------------------------------------------------------------------------
40 // Information about the machine's hardware TBB is happen to work on
41 //------------------------------------------------------------------------
43  friend class numa_affinity_handler;
44 
45  static hwloc_topology_t topology;
46  static hwloc_cpuset_t process_cpu_affinity_mask;
47  static hwloc_nodeset_t process_node_affinity_mask;
48  static std::vector<hwloc_cpuset_t> affinity_masks_list;
49 
50  static std::vector<int> default_concurrency_list;
51  static std::vector<int> numa_indexes_list;
52  static int numa_nodes_count;
53 
56 
57  // Binding threads to NUMA nodes that locates in another Windows Processor groups
58  // is allowed only if machine topology contains several Windows Processors groups
59  // and process affinity mask wasn`t limited manually (affinity mask cannot violates
60  // processors group boundaries).
61  static bool intergroup_binding_allowed(size_t groups_num) { return groups_num > 1; }
62 
63 public:
64  typedef hwloc_cpuset_t affinity_mask;
65  typedef hwloc_const_cpuset_t const_affinity_mask;
66 
68 
69  static void initialize( size_t groups_num ) {
71  return;
73 
74  // Parse topology
75  if ( hwloc_topology_init( &topology ) == 0 ) {
77  if ( hwloc_topology_load( topology ) == 0 ) {
79  }
80  }
81 
82  // Fill parameters with stubs if topology parsing is broken.
85  hwloc_topology_destroy(topology);
86  }
87  numa_nodes_count = 1;
88  numa_indexes_list.push_back(-1);
89  default_concurrency_list.push_back(-1);
90  return;
91  }
92 
93  // Getting process affinity mask
94  if ( intergroup_binding_allowed(groups_num) ) {
95  process_cpu_affinity_mask = hwloc_bitmap_dup(hwloc_topology_get_complete_cpuset (topology));
96  process_node_affinity_mask = hwloc_bitmap_dup(hwloc_topology_get_complete_nodeset(topology));
97  } else {
98  process_cpu_affinity_mask = hwloc_bitmap_alloc();
99  process_node_affinity_mask = hwloc_bitmap_alloc();
100 
103  }
104 
105  // If system contains no NUMA nodes, HWLOC 1.11 returns an infinitely filled bitmap.
106  // hwloc_bitmap_weight() returns negative value for such bitmaps, so we use this check
107  // to change way of topology initialization.
108  if (hwloc_bitmap_weight(process_node_affinity_mask) < 0) {
109  numa_nodes_count = 1;
110  numa_indexes_list.push_back(0);
111  default_concurrency_list.push_back(hwloc_bitmap_weight(process_cpu_affinity_mask));
112 
113  affinity_masks_list.push_back(hwloc_bitmap_dup(process_cpu_affinity_mask));
115  return;
116  }
117 
118  // Get number of available NUMA nodes
119  numa_nodes_count = hwloc_bitmap_weight(process_node_affinity_mask);
120  __TBB_ASSERT(numa_nodes_count > 0, "Any system must contain one or more NUMA nodes");
121 
122  // Get NUMA logical indexes list
123  unsigned counter = 0;
124  int i = 0;
125  int max_numa_index = -1;
127  hwloc_obj_t node_buffer;
128  hwloc_bitmap_foreach_begin(i, process_node_affinity_mask) {
129  node_buffer = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, i);
130  numa_indexes_list[counter] = static_cast<int>(node_buffer->logical_index);
131 
132  if ( numa_indexes_list[counter] > max_numa_index ) {
133  max_numa_index = numa_indexes_list[counter];
134  }
135 
136  counter++;
137  } hwloc_bitmap_foreach_end();
138  __TBB_ASSERT(max_numa_index >= 0, "Maximal NUMA index must not be negative");
139 
140  // Fill concurrency and affinity masks lists
141  default_concurrency_list.resize(max_numa_index + 1);
142  affinity_masks_list.resize(max_numa_index + 1);
143 
144  int index = 0;
145  hwloc_bitmap_foreach_begin(i, process_node_affinity_mask) {
146  node_buffer = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, i);
147  index = static_cast<int>(node_buffer->logical_index);
148 
149  hwloc_cpuset_t& current_mask = affinity_masks_list[index];
150  current_mask = hwloc_bitmap_dup(node_buffer->cpuset);
151 
152  hwloc_bitmap_and(current_mask, current_mask, process_cpu_affinity_mask);
153  __TBB_ASSERT(!hwloc_bitmap_iszero(current_mask), "hwloc detected unavailable NUMA node");
154  default_concurrency_list[index] = hwloc_bitmap_weight(current_mask);
155  } hwloc_bitmap_foreach_end();
157  }
158 
160  if ( is_topology_parsed() ) {
161  for (int i = 0; i < numa_nodes_count; i++) {
162  hwloc_bitmap_free(affinity_masks_list[numa_indexes_list[i]]);
163  }
164  hwloc_bitmap_free(process_node_affinity_mask);
165  hwloc_bitmap_free(process_cpu_affinity_mask);
166  }
167 
169  hwloc_topology_destroy(topology);
170  }
171 
173  }
174 
175  static void fill(int& nodes_count, int*& indexes_list, int*& concurrency_list ) {
176  __TBB_ASSERT(is_topology_parsed(), "Trying to get access to uninitialized platform_topology");
177  nodes_count = numa_nodes_count;
178  indexes_list = &numa_indexes_list.front();
179  concurrency_list = &default_concurrency_list.front();
180  }
181 
183  __TBB_ASSERT(is_topology_parsed(), "Trying to get access to uninitialized platform_topology");
184  return hwloc_bitmap_dup(process_cpu_affinity_mask);
185  }
186 
187  static void free_affinity_mask( affinity_mask mask_to_free ) {
188  hwloc_bitmap_free(mask_to_free); // If bitmap is NULL, no operation is performed.
189  }
190 
191  static void store_current_affinity_mask( affinity_mask current_mask ) {
192  assertion_hwloc_wrapper(hwloc_get_cpubind, topology, current_mask, HWLOC_CPUBIND_THREAD);
193 
194  hwloc_bitmap_and(current_mask, current_mask, process_cpu_affinity_mask);
195  __TBB_ASSERT(!hwloc_bitmap_iszero(current_mask),
196  "Current affinity mask must intersects with process affinity mask");
197  }
198 
199  static void set_new_affinity_mask( const_affinity_mask new_mask ) {
200  assertion_hwloc_wrapper(hwloc_set_cpubind, topology, new_mask, HWLOC_CPUBIND_THREAD);
201  }
202 
203  static const_affinity_mask get_node_affinity_mask( int node_index ) {
204  __TBB_ASSERT((int)affinity_masks_list.size() > node_index,
205  "Trying to get affinity mask for uninitialized NUMA node");
206  return affinity_masks_list[node_index];
207  }
208 };
209 
210 hwloc_topology_t platform_topology::topology = NULL;
212 hwloc_nodeset_t platform_topology::process_node_affinity_mask = NULL;
213 std::vector<hwloc_cpuset_t> platform_topology::affinity_masks_list;
214 
216 std::vector<int> platform_topology::numa_indexes_list;
218 
220 
222  // Following vector saves thread affinity mask on scheduler entry to return it to this thread
223  // on scheduler exit.
224  typedef std::vector<platform_topology::affinity_mask> affinity_masks_container;
226 
227 public:
229  for (affinity_masks_container::iterator it = affinity_backup.begin();
230  it != affinity_backup.end(); it++) {
232  }
233  }
234 
236  for (affinity_masks_container::iterator it = affinity_backup.begin();
237  it != affinity_backup.end(); it++) {
239  }
240  }
241 
242  void bind_thread_to_node( unsigned slot_num, unsigned numa_node_id ) {
243  __TBB_ASSERT(slot_num < affinity_backup.size(),
244  "The slot number is greater than the number of slots in the arena");
246  "Trying to get access to uninitialized platform_topology");
248 
251  }
252 
253  void restore_previous_affinity_mask( unsigned slot_num ) {
255  "Trying to get access to uninitialized platform_topology");
257  };
258 
259 };
260 
261 extern "C" { // exported to TBB interfaces
262 
263 void initialize_numa_topology( size_t groups_num,
264  int& nodes_count, int*& indexes_list, int*& concurrency_list ) {
265  platform_topology::initialize(groups_num);
266  platform_topology::fill(nodes_count, indexes_list, concurrency_list);
267 }
268 
270  __TBB_ASSERT(slot_num > 0, "Trying to create numa handler for 0 threads.");
271  return new binding_handler(slot_num);
272 }
273 
275  __TBB_ASSERT(handler_ptr != NULL, "Trying to deallocate NULL pointer.");
276  delete handler_ptr;
277 }
278 
279 void bind_to_node(binding_handler* handler_ptr, int slot_num, int numa_id) {
280  __TBB_ASSERT(handler_ptr != NULL, "Trying to get access to uninitialized metadata.");
281  __TBB_ASSERT(platform_topology::is_topology_parsed(), "Trying to get access "
282  "to uninitialized platform_topology.");
283  handler_ptr->bind_thread_to_node(slot_num, numa_id);
284 }
285 
286 void restore_affinity(binding_handler* handler_ptr, int slot_num) {
287  __TBB_ASSERT(handler_ptr != NULL, "Trying to get access to uninitialized metadata.");
288  __TBB_ASSERT(platform_topology::is_topology_parsed(), "Trying to get access "
289  "to uninitialized platform_topology.");
290  handler_ptr->restore_previous_affinity_mask(slot_num);
291 }
292 
293 } // extern "C"
294 
295 } // namespace internal
296 } // namespace tbb
297 
298 #undef assertion_hwloc_wrapper
void bind_thread_to_node(unsigned slot_num, unsigned numa_node_id)
Definition: tbb_bind.cpp:242
static affinity_mask allocate_process_affinity_mask()
Definition: tbb_bind.cpp:182
affinity_masks_container affinity_backup
Definition: tbb_bind.cpp:225
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t size
static void fill(int &nodes_count, int *&indexes_list, int *&concurrency_list)
Definition: tbb_bind.cpp:175
void restore_previous_affinity_mask(unsigned slot_num)
Definition: tbb_bind.cpp:253
hwloc_const_cpuset_t const_affinity_mask
Definition: tbb_bind.cpp:65
std::vector< platform_topology::affinity_mask > affinity_masks_container
Definition: tbb_bind.cpp:224
static void initialize(size_t groups_num)
Definition: tbb_bind.cpp:69
void bind_to_node(binding_handler *handler_ptr, int slot_num, int numa_id)
Definition: tbb_bind.cpp:279
void initialize_numa_topology(size_t groups_num, int &nodes_count, int *&indexes_list, int *&concurrency_list)
Definition: tbb_bind.cpp:263
static bool intergroup_binding_allowed(size_t groups_num)
Definition: tbb_bind.cpp:61
void deallocate_binding_handler(binding_handler *handler_ptr)
Definition: tbb_bind.cpp:274
static void free_affinity_mask(affinity_mask mask_to_free)
Definition: tbb_bind.cpp:187
static const_affinity_mask get_node_affinity_mask(int node_index)
Definition: tbb_bind.cpp:203
binding_handler * allocate_binding_handler(int slot_num)
Definition: tbb_bind.cpp:269
#define assertion_hwloc_wrapper(command,...)
Definition: tbb_bind.cpp:33
static void set_new_affinity_mask(const_affinity_mask new_mask)
Definition: tbb_bind.cpp:199
static void store_current_affinity_mask(affinity_mask current_mask)
Definition: tbb_bind.cpp:191
static hwloc_nodeset_t process_node_affinity_mask
Definition: tbb_bind.cpp:47
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:165
void restore_affinity(binding_handler *handler_ptr, int slot_num)
Definition: tbb_bind.cpp:286
static std::vector< int > numa_indexes_list
Definition: tbb_bind.cpp:51
static hwloc_topology_t topology
Definition: tbb_bind.cpp:45
The graph class.
static hwloc_cpuset_t process_cpu_affinity_mask
Definition: tbb_bind.cpp:46
static std::vector< int > default_concurrency_list
Definition: tbb_bind.cpp:50
static std::vector< hwloc_cpuset_t > affinity_masks_list
Definition: tbb_bind.cpp:48
static init_stages initialization_state
Definition: tbb_bind.cpp:55

Copyright © 2005-2020 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.