partitioner.C
Go to the documentation of this file.
1 // The libMesh Finite Element Library.
2 // Copyright (C) 2002-2018 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3 
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License as published by the Free Software Foundation; either
7 // version 2.1 of the License, or (at your option) any later version.
8 
9 // This library is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // Lesser General Public License for more details.
13 
14 // You should have received a copy of the GNU Lesser General Public
15 // License along with this library; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 
18 
19 
20 // Local Includes
21 #include "libmesh/elem.h"
22 #include "libmesh/mesh_base.h"
23 #include "libmesh/parallel.h"
24 #include "libmesh/parallel_sync.h"
25 #include "libmesh/partitioner.h"
26 #include "libmesh/mesh_tools.h"
30 
31 #ifdef LIBMESH_HAVE_PETSC
33 #include "petscmat.h"
35 #endif
36 
37 namespace libMesh
38 {
39 
40 
41 
42 // ------------------------------------------------------------
43 // Partitioner static data
45 
46 
47 
48 // ------------------------------------------------------------
49 // Partitioner implementation
51 {
52  this->partition(mesh,mesh.n_processors());
53 }
54 
55 
56 
58  const unsigned int n)
59 {
60  libmesh_parallel_only(mesh.comm());
61 
62  // BSK - temporary fix while redistribution is integrated 6/26/2008
63  // Uncomment this to not repartition in parallel
64  // if (!mesh.is_serial())
65  // return;
66 
67  // we cannot partition into more pieces than we have
68  // active elements!
69  const unsigned int n_parts =
70  static_cast<unsigned int>
71  (std::min(mesh.n_active_elem(), static_cast<dof_id_type>(n)));
72 
73  // Set the number of partitions in the mesh
74  mesh.set_n_partitions()=n_parts;
75 
76  if (n_parts == 1)
77  {
78  this->single_partition (mesh);
79  return;
80  }
81 
82  // First assign a temporary partitioning to any unpartitioned elements
84 
85  // Call the partitioning function
86  this->_do_partition(mesh,n_parts);
87 
88  // Set the parent's processor ids
90 
91  // Redistribute elements if necessary, before setting node processor
92  // ids, to make sure those will be set consistently
93  mesh.redistribute();
94 
95 #ifdef DEBUG
97 
98  // Messed up elem processor_id()s can leave us without the child
99  // elements we need to restrict vectors on a distributed mesh
100  MeshTools::libmesh_assert_valid_procids<Elem>(mesh);
101 #endif
102 
103  // Set the node's processor ids
105 
106 #ifdef DEBUG
107  MeshTools::libmesh_assert_valid_procids<Elem>(mesh);
108 #endif
109 
110  // Give derived Mesh classes a chance to update any cached data to
111  // reflect the new partitioning
112  mesh.update_post_partitioning();
113 }
114 
115 
116 
118 {
119  this->repartition(mesh,mesh.n_processors());
120 }
121 
122 
123 
125  const unsigned int n)
126 {
127  // we cannot partition into more pieces than we have
128  // active elements!
129  const unsigned int n_parts =
130  static_cast<unsigned int>
131  (std::min(mesh.n_active_elem(), static_cast<dof_id_type>(n)));
132 
133  // Set the number of partitions in the mesh
134  mesh.set_n_partitions()=n_parts;
135 
136  if (n_parts == 1)
137  {
138  this->single_partition (mesh);
139  return;
140  }
141 
142  // First assign a temporary partitioning to any unpartitioned elements
144 
145  // Call the partitioning function
146  this->_do_repartition(mesh,n_parts);
147 
148  // Set the parent's processor ids
150 
151  // Set the node's processor ids
153 }
154 
155 
156 
157 
158 
160 {
162  mesh.elements_end());
163 
164  // Redistribute, in case someone (like our unit tests) is doing
165  // something silly (like moving a whole already-distributed mesh
166  // back onto rank 0).
167  mesh.redistribute();
168 }
169 
170 
171 
174 {
175  LOG_SCOPE("single_partition_range()", "Partitioner");
176 
177  for (auto & elem : as_range(it, end))
178  {
179  elem->processor_id() = 0;
180 
181  // Assign all this element's nodes to processor 0 as well.
182  for (unsigned int n=0; n<elem->n_nodes(); ++n)
183  elem->node_ptr(n)->processor_id() = 0;
184  }
185 }
186 
188 {
190 }
191 
192 
193 
195  const unsigned int n_subdomains)
196 {
197  MeshBase::element_iterator it = mesh.unpartitioned_elements_begin();
198  const MeshBase::element_iterator end = mesh.unpartitioned_elements_end();
199 
200  const dof_id_type n_unpartitioned_elements = MeshTools::n_elem (it, end);
201 
202  // the unpartitioned elements must exist on all processors. If the range is empty on one
203  // it is empty on all, and we can quit right here.
204  if (!n_unpartitioned_elements)
205  return;
206 
207  // find the target subdomain sizes
208  std::vector<dof_id_type> subdomain_bounds(mesh.n_processors());
209 
210  for (processor_id_type pid=0; pid<mesh.n_processors(); pid++)
211  {
212  dof_id_type tgt_subdomain_size = 0;
213 
214  // watch out for the case that n_subdomains < n_processors
215  if (pid < n_subdomains)
216  {
217  tgt_subdomain_size = n_unpartitioned_elements/n_subdomains;
218 
219  if (pid < n_unpartitioned_elements%n_subdomains)
220  tgt_subdomain_size++;
221 
222  }
223 
224  //libMesh::out << "pid, #= " << pid << ", " << tgt_subdomain_size << std::endl;
225  if (pid == 0)
226  subdomain_bounds[0] = tgt_subdomain_size;
227  else
228  subdomain_bounds[pid] = subdomain_bounds[pid-1] + tgt_subdomain_size;
229  }
230 
231  libmesh_assert_equal_to (subdomain_bounds.back(), n_unpartitioned_elements);
232 
233  // create the unique mapping for all unpartitioned elements independent of partitioning
234  // determine the global indexing for all the unpartitioned elements
235  std::vector<dof_id_type> global_indices;
236 
237  // Calling this on all processors a unique range in [0,n_unpartitioned_elements) is constructed.
238  // Only the indices for the elements we pass in are returned in the array.
241  global_indices);
242 
243  dof_id_type cnt=0;
244  for (auto & elem : as_range(it, end))
245  {
246  libmesh_assert_less (cnt, global_indices.size());
247  const dof_id_type global_index =
248  global_indices[cnt++];
249 
250  libmesh_assert_less (global_index, subdomain_bounds.back());
251  libmesh_assert_less (global_index, n_unpartitioned_elements);
252 
253  const processor_id_type subdomain_id =
254  cast_int<processor_id_type>
255  (std::distance(subdomain_bounds.begin(),
256  std::upper_bound(subdomain_bounds.begin(),
257  subdomain_bounds.end(),
258  global_index)));
259  libmesh_assert_less (subdomain_id, n_subdomains);
260 
261  elem->processor_id() = subdomain_id;
262  //libMesh::out << "assigning " << global_index << " to " << subdomain_id << std::endl;
263  }
264 }
265 
266 
267 
269 {
270  // Ignore the parameter when !LIBMESH_ENABLE_AMR
272 
273  LOG_SCOPE("set_parent_processor_ids()", "Partitioner");
274 
275 #ifdef LIBMESH_ENABLE_AMR
276 
277  // If the mesh is serial we have access to all the elements,
278  // in particular all the active ones. We can therefore set
279  // the parent processor ids indirectly through their children, and
280  // set the subactive processor ids while examining their active
281  // ancestors.
282  // By convention a parent is assigned to the minimum processor
283  // of all its children, and a subactive is assigned to the processor
284  // of its active ancestor.
285  if (mesh.is_serial())
286  {
287  for (auto & elem : mesh.active_element_ptr_range())
288  {
289  // First set descendents
290  std::vector<const Elem *> subactive_family;
291  elem->total_family_tree(subactive_family);
292  for (std::size_t i = 0; i != subactive_family.size(); ++i)
293  const_cast<Elem *>(subactive_family[i])->processor_id() = elem->processor_id();
294 
295  // Then set ancestors
296  Elem * parent = elem->parent();
297 
298  while (parent)
299  {
300  // invalidate the parent id, otherwise the min below
301  // will not work if the current parent id is less
302  // than all the children!
303  parent->invalidate_processor_id();
304 
305  for (auto & child : parent->child_ref_range())
306  {
307  libmesh_assert(!child.is_remote());
308  libmesh_assert_not_equal_to (child.processor_id(), DofObject::invalid_processor_id);
309  parent->processor_id() = std::min(parent->processor_id(),
310  child.processor_id());
311  }
312  parent = parent->parent();
313  }
314  }
315  }
316 
317  // When the mesh is parallel we cannot guarantee that parents have access to
318  // all their children.
319  else
320  {
321  // Setting subactive processor ids is easy: we can guarantee
322  // that children have access to all their parents.
323 
324  // Loop over all the active elements in the mesh
325  for (auto & child : mesh.active_element_ptr_range())
326  {
327  std::vector<const Elem *> subactive_family;
328  child->total_family_tree(subactive_family);
329  for (std::size_t i = 0; i != subactive_family.size(); ++i)
330  const_cast<Elem *>(subactive_family[i])->processor_id() = child->processor_id();
331  }
332 
333  // When the mesh is parallel we cannot guarantee that parents have access to
334  // all their children.
335 
336  // We will use a brute-force approach here. Each processor finds its parent
337  // elements and sets the parent pid to the minimum of its
338  // semilocal descendants.
339  // A global reduction is then performed to make sure the true minimum is found.
340  // As noted, this is required because we cannot guarantee that a parent has
341  // access to all its children on any single processor.
342  libmesh_parallel_only(mesh.comm());
343  libmesh_assert(MeshTools::n_elem(mesh.unpartitioned_elements_begin(),
344  mesh.unpartitioned_elements_end()) == 0);
345 
346  const dof_id_type max_elem_id = mesh.max_elem_id();
347 
348  std::vector<processor_id_type>
349  parent_processor_ids (std::min(communication_blocksize,
350  max_elem_id));
351 
352  for (dof_id_type blk=0, last_elem_id=0; last_elem_id<max_elem_id; blk++)
353  {
354  last_elem_id =
355  std::min(static_cast<dof_id_type>((blk+1)*communication_blocksize),
356  max_elem_id);
357  const dof_id_type first_elem_id = blk*communication_blocksize;
358 
359  std::fill (parent_processor_ids.begin(),
360  parent_processor_ids.end(),
362 
363  // first build up local contributions to parent_processor_ids
364  bool have_parent_in_block = false;
365 
366  for (auto & parent : as_range(mesh.ancestor_elements_begin(),
367  mesh.ancestor_elements_end()))
368  {
369  const dof_id_type parent_idx = parent->id();
370  libmesh_assert_less (parent_idx, max_elem_id);
371 
372  if ((parent_idx >= first_elem_id) &&
373  (parent_idx < last_elem_id))
374  {
375  have_parent_in_block = true;
377 
378  std::vector<const Elem *> active_family;
379  parent->active_family_tree(active_family);
380  for (std::size_t i = 0; i != active_family.size(); ++i)
381  parent_pid = std::min (parent_pid, active_family[i]->processor_id());
382 
383  const dof_id_type packed_idx = parent_idx - first_elem_id;
384  libmesh_assert_less (packed_idx, parent_processor_ids.size());
385 
386  parent_processor_ids[packed_idx] = parent_pid;
387  }
388  }
389 
390  // then find the global minimum
391  mesh.comm().min (parent_processor_ids);
392 
393  // and assign the ids, if we have a parent in this block.
394  if (have_parent_in_block)
395  for (auto & parent : as_range(mesh.ancestor_elements_begin(),
396  mesh.ancestor_elements_end()))
397  {
398  const dof_id_type parent_idx = parent->id();
399 
400  if ((parent_idx >= first_elem_id) &&
401  (parent_idx < last_elem_id))
402  {
403  const dof_id_type packed_idx = parent_idx - first_elem_id;
404  libmesh_assert_less (packed_idx, parent_processor_ids.size());
405 
406  const processor_id_type parent_pid =
407  parent_processor_ids[packed_idx];
408 
409  libmesh_assert_not_equal_to (parent_pid, DofObject::invalid_processor_id);
410 
411  parent->processor_id() = parent_pid;
412  }
413  }
414  }
415  }
416 
417 #endif // LIBMESH_ENABLE_AMR
418 }
419 
420 void
422  std::map<std::pair<processor_id_type, processor_id_type>, std::set<dof_id_type>> & processor_pair_to_nodes)
423 {
424  // This function must be run on all processors at once
425  libmesh_parallel_only(mesh.comm());
426 
427  processor_pair_to_nodes.clear();
428 
429  std::set<dof_id_type> mynodes;
430  std::set<dof_id_type> neighbor_nodes;
431  std::vector<dof_id_type> common_nodes;
432 
433  // Loop over all the active elements
434  for (auto & elem : mesh.active_element_ptr_range())
435  {
436  libmesh_assert(elem);
437 
438  libmesh_assert_not_equal_to (elem->processor_id(), DofObject::invalid_processor_id);
439 
440  auto n_nodes = elem->n_nodes();
441 
442  // prepare data for this element
443  mynodes.clear();
444  neighbor_nodes.clear();
445  common_nodes.clear();
446 
447  for (unsigned int inode = 0; inode < n_nodes; inode++)
448  mynodes.insert(elem->node_id(inode));
449 
450  for (auto i : elem->side_index_range())
451  {
452  auto neigh = elem->neighbor_ptr(i);
453  if (neigh && !neigh->is_remote() && neigh->processor_id() != elem->processor_id())
454  {
455  neighbor_nodes.clear();
456  common_nodes.clear();
457  auto neigh_n_nodes = neigh->n_nodes();
458  for (unsigned int inode = 0; inode < neigh_n_nodes; inode++)
459  neighbor_nodes.insert(neigh->node_id(inode));
460 
461  std::set_intersection(mynodes.begin(), mynodes.end(),
462  neighbor_nodes.begin(), neighbor_nodes.end(),
463  std::back_inserter(common_nodes));
464 
465  auto & map_set = processor_pair_to_nodes[std::make_pair(std::min(elem->processor_id(), neigh->processor_id()),
466  std::max(elem->processor_id(), neigh->processor_id()))];
467  for (auto global_node_id : common_nodes)
468  map_set.insert(global_node_id);
469  }
470  }
471  }
472 }
473 
475 {
476  // This function must be run on all processors at once
477  libmesh_parallel_only(mesh.comm());
478 
479  std::map<std::pair<processor_id_type, processor_id_type>, std::set<dof_id_type>> processor_pair_to_nodes;
480 
481  processor_pairs_to_interface_nodes(mesh, processor_pair_to_nodes);
482 
483  for (auto & pmap : processor_pair_to_nodes)
484  {
485  std::size_t n_own_nodes = pmap.second.size()/2, i = 0;
486 
487  for (auto it = pmap.second.begin(); it != pmap.second.end(); it++, i++)
488  {
489  auto & node = mesh.node_ref(*it);
490  if (i <= n_own_nodes)
491  node.processor_id() = pmap.first.first;
492  else
493  node.processor_id() = pmap.first.second;
494  }
495  }
496 }
497 
499 {
500  // This function must be run on all processors at once
501  libmesh_parallel_only(mesh.comm());
502 
503  std::map<std::pair<processor_id_type, processor_id_type>, std::set<dof_id_type>> processor_pair_to_nodes;
504 
505  processor_pairs_to_interface_nodes(mesh, processor_pair_to_nodes);
506 
507  std::unordered_map<dof_id_type, std::vector<const Elem *>> nodes_to_elem_map;
508 
509  MeshTools::build_nodes_to_elem_map(mesh, nodes_to_elem_map);
510 
511  std::vector<const Node *> neighbors;
512  std::set<dof_id_type> neighbors_order;
513  std::vector<dof_id_type> common_nodes;
514  std::queue<dof_id_type> nodes_queue;
515  std::set<dof_id_type> visted_nodes;
516 
517  for (auto & pmap : processor_pair_to_nodes)
518  {
519  std::size_t n_own_nodes = pmap.second.size()/2;
520 
521  // Initialize node assignment
522  for (auto it = pmap.second.begin(); it != pmap.second.end(); it++)
523  mesh.node_ref(*it).processor_id() = pmap.first.second;
524 
525  visted_nodes.clear();
526  for (auto it = pmap.second.begin(); it != pmap.second.end(); it++)
527  {
528  mesh.node_ref(*it).processor_id() = pmap.first.second;
529 
530  if (visted_nodes.find(*it) != visted_nodes.end())
531  continue;
532  else
533  {
534  nodes_queue.push(*it);
535  visted_nodes.insert(*it);
536  if (visted_nodes.size() >= n_own_nodes)
537  break;
538  }
539 
540  while (!nodes_queue.empty())
541  {
542  auto & node = mesh.node_ref(nodes_queue.front());
543  nodes_queue.pop();
544 
545  neighbors.clear();
546  MeshTools::find_nodal_neighbors(mesh, node, nodes_to_elem_map, neighbors);
547  neighbors_order.clear();
548  for (auto & neighbor : neighbors)
549  neighbors_order.insert(neighbor->id());
550 
551  common_nodes.clear();
552  std::set_intersection(pmap.second.begin(), pmap.second.end(),
553  neighbors_order.begin(), neighbors_order.end(),
554  std::back_inserter(common_nodes));
555 
556  for (auto c_node : common_nodes)
557  if (visted_nodes.find(c_node) == visted_nodes.end())
558  {
559  nodes_queue.push(c_node);
560  visted_nodes.insert(c_node);
561  if (visted_nodes.size() >= n_own_nodes)
562  goto queue_done;
563  }
564 
565  if (visted_nodes.size() >= n_own_nodes)
566  goto queue_done;
567  }
568  }
569  queue_done:
570  for (auto node : visted_nodes)
571  mesh.node_ref(node).processor_id() = pmap.first.first;
572  }
573 }
574 
576 {
577  libmesh_ignore(mesh); // Only used if LIBMESH_HAVE_PETSC
578 
579  // This function must be run on all processors at once
580  libmesh_parallel_only(mesh.comm());
581 
582 #if LIBMESH_HAVE_PETSC
583  std::map<std::pair<processor_id_type, processor_id_type>, std::set<dof_id_type>> processor_pair_to_nodes;
584 
585  processor_pairs_to_interface_nodes(mesh, processor_pair_to_nodes);
586 
587  std::vector<std::vector<const Elem *>> nodes_to_elem_map;
588 
589  MeshTools::build_nodes_to_elem_map(mesh, nodes_to_elem_map);
590 
591  std::vector<const Node *> neighbors;
592  std::set<dof_id_type> neighbors_order;
593  std::vector<dof_id_type> common_nodes;
594 
595  std::vector<dof_id_type> rows;
596  std::vector<dof_id_type> cols;
597 
598  std::map<dof_id_type, dof_id_type> global_to_local;
599 
600  for (auto & pmap : processor_pair_to_nodes)
601  {
602  unsigned int i = 0;
603 
604  rows.clear();
605  rows.resize(pmap.second.size()+1);
606  cols.clear();
607  for (auto it = pmap.second.begin(); it != pmap.second.end(); it++)
608  global_to_local[*it] = i++;
609 
610  i = 0;
611  for (auto it = pmap.second.begin(); it != pmap.second.end(); it++, i++)
612  {
613  auto & node = mesh.node_ref(*it);
614  neighbors.clear();
615  MeshTools::find_nodal_neighbors(mesh, node, nodes_to_elem_map, neighbors);
616  neighbors_order.clear();
617  for (auto & neighbor : neighbors)
618  neighbors_order.insert(neighbor->id());
619 
620  common_nodes.clear();
621  std::set_intersection(pmap.second.begin(), pmap.second.end(),
622  neighbors_order.begin(), neighbors_order.end(),
623  std::back_inserter(common_nodes));
624 
625  rows[i+1] = rows[i] + cast_int<dof_id_type>(common_nodes.size());
626 
627  for (auto c_node : common_nodes)
628  cols.push_back(global_to_local[c_node]);
629  }
630 
631  Mat adj;
632  MatPartitioning part;
633  IS is;
634  PetscInt local_size, rows_size, cols_size;
635  PetscInt *adj_i, *adj_j;
636  const PetscInt *indices;
637  PetscCalloc1(rows.size(), &adj_i);
638  PetscCalloc1(cols.size(), &adj_j);
639  rows_size = cast_int<PetscInt>(rows.size());
640  for (PetscInt ii=0; ii<rows_size; ii++)
641  adj_i[ii] = rows[ii];
642 
643  cols_size = cast_int<PetscInt>(cols.size());
644  for (PetscInt ii=0; ii<cols_size; ii++)
645  adj_j[ii] = cols[ii];
646 
647  const PetscInt sz = cast_int<PetscInt>(pmap.second.size());
648  MatCreateMPIAdj(PETSC_COMM_SELF, sz, sz, adj_i, adj_j,nullptr,&adj);
649  MatPartitioningCreate(PETSC_COMM_SELF,&part);
650  MatPartitioningSetAdjacency(part,adj);
651  MatPartitioningSetNParts(part,2);
652  PetscObjectSetOptionsPrefix((PetscObject)part, "balance_");
653  MatPartitioningSetFromOptions(part);
654  MatPartitioningApply(part,&is);
655 
656  MatDestroy(&adj);
657  MatPartitioningDestroy(&part);
658 
659  ISGetLocalSize(is, &local_size);
660  ISGetIndices(is, &indices);
661  i = 0;
662  for (auto it = pmap.second.begin(); it != pmap.second.end(); it++, i++)
663  {
664  auto & node = mesh.node_ref(*it);
665  if (indices[i])
666  node.processor_id() = pmap.first.second;
667  else
668  node.processor_id() = pmap.first.first;
669  }
670  ISRestoreIndices(is, &indices);
671  ISDestroy(&is);
672  }
673 #else
674  libmesh_error_msg("PETSc is required");
675 #endif
676 }
677 
678 
680 {
681  LOG_SCOPE("set_node_processor_ids()","Partitioner");
682 
683  // This function must be run on all processors at once
684  libmesh_parallel_only(mesh.comm());
685 
686  // If we have any unpartitioned elements at this
687  // stage there is a problem
688  libmesh_assert (MeshTools::n_elem(mesh.unpartitioned_elements_begin(),
689  mesh.unpartitioned_elements_end()) == 0);
690 
691 
692  // const dof_id_type orig_n_local_nodes = mesh.n_local_nodes();
693 
694  // libMesh::err << "[" << mesh.processor_id() << "]: orig_n_local_nodes="
695  // << orig_n_local_nodes << std::endl;
696 
697  // Build up request sets. Each node is currently owned by a processor because
698  // it is connected to an element owned by that processor. However, during the
699  // repartitioning phase that element may have been assigned a new processor id, but
700  // it is still resident on the original processor. We need to know where to look
701  // for new ids before assigning new ids, otherwise we may be asking the wrong processors
702  // for the wrong information.
703  //
704  // The only remaining issue is what to do with unpartitioned nodes. Since they are required
705  // to live on all processors we can simply rely on ourselves to number them properly.
706  std::map<processor_id_type, std::vector<dof_id_type>>
707  requested_node_ids;
708 
709  // Loop over all the nodes, count the ones on each processor. We can skip ourself
710  std::vector<dof_id_type> ghost_nodes_from_proc(mesh.n_processors(), 0);
711 
712  for (auto & node : mesh.node_ptr_range())
713  {
714  libmesh_assert(node);
715  const processor_id_type current_pid = node->processor_id();
716  if (current_pid != mesh.processor_id() &&
717  current_pid != DofObject::invalid_processor_id)
718  {
719  libmesh_assert_less (current_pid, ghost_nodes_from_proc.size());
720  ghost_nodes_from_proc[current_pid]++;
721  }
722  }
723 
724  // We know how many objects live on each processor, so reserve()
725  // space for each.
726  for (processor_id_type pid=0; pid != mesh.n_processors(); ++pid)
727  if (ghost_nodes_from_proc[pid])
728  requested_node_ids[pid].reserve(ghost_nodes_from_proc[pid]);
729 
730  // We need to get the new pid for each node from the processor
731  // which *currently* owns the node. We can safely skip ourself
732  for (auto & node : mesh.node_ptr_range())
733  {
734  libmesh_assert(node);
735  const processor_id_type current_pid = node->processor_id();
736  if (current_pid != mesh.processor_id() &&
737  current_pid != DofObject::invalid_processor_id)
738  {
739  libmesh_assert_less (requested_node_ids[current_pid].size(),
740  ghost_nodes_from_proc[current_pid]);
741  requested_node_ids[current_pid].push_back(node->id());
742  }
743 
744  // Unset any previously-set node processor ids
745  node->invalidate_processor_id();
746  }
747 
748  // Loop over all the active elements
749  for (auto & elem : mesh.active_element_ptr_range())
750  {
751  libmesh_assert(elem);
752 
753  libmesh_assert_not_equal_to (elem->processor_id(), DofObject::invalid_processor_id);
754 
755  // Consider updating the processor id on this element's nodes
756  for (unsigned int n=0; n<elem->n_nodes(); ++n)
757  {
758  Node & node = elem->node_ref(n);
759  processor_id_type & pid = node.processor_id();
760  pid = node.choose_processor_id(pid, elem->processor_id());
761  }
762  }
763 
764  bool load_balanced_nodes_linear =
765  libMesh::on_command_line ("--load-balanced-nodes-linear");
766 
767  if (load_balanced_nodes_linear)
769 
770  bool load_balanced_nodes_bfs =
771  libMesh::on_command_line ("--load-balanced-nodes-bfs");
772 
773  if (load_balanced_nodes_bfs)
775 
776  bool load_balanced_nodes_petscpartition =
777  libMesh::on_command_line ("--load_balanced_nodes_petscpartitioner");
778 
779  if (load_balanced_nodes_petscpartition)
781 
782  // And loop over the subactive elements, but don't reassign
783  // nodes that are already active on another processor.
784  for (auto & elem : as_range(mesh.subactive_elements_begin(),
785  mesh.subactive_elements_end()))
786  {
787  libmesh_assert(elem);
788 
789  libmesh_assert_not_equal_to (elem->processor_id(), DofObject::invalid_processor_id);
790 
791  for (unsigned int n=0; n<elem->n_nodes(); ++n)
792  if (elem->node_ptr(n)->processor_id() == DofObject::invalid_processor_id)
793  elem->node_ptr(n)->processor_id() = elem->processor_id();
794  }
795 
796  // Same for the inactive elements -- we will have already gotten most of these
797  // nodes, *except* for the case of a parent with a subset of children which are
798  // ghost elements. In that case some of the parent nodes will not have been
799  // properly handled yet
800  for (auto & elem : as_range(mesh.not_active_elements_begin(),
801  mesh.not_active_elements_end()))
802  {
803  libmesh_assert(elem);
804 
805  libmesh_assert_not_equal_to (elem->processor_id(), DofObject::invalid_processor_id);
806 
807  for (unsigned int n=0; n<elem->n_nodes(); ++n)
808  if (elem->node_ptr(n)->processor_id() == DofObject::invalid_processor_id)
809  elem->node_ptr(n)->processor_id() = elem->processor_id();
810  }
811 
812  // We can't assert that all nodes are connected to elements, because
813  // a DistributedMesh with NodeConstraints might have pulled in some
814  // remote nodes solely for evaluating those constraints.
815  // MeshTools::libmesh_assert_connected_nodes(mesh);
816 
817  // For such nodes, we'll do a sanity check later when making sure
818  // that we successfully reset their processor ids to something
819  // valid.
820 
821  auto gather_functor =
822  [& mesh]
823  (processor_id_type, const std::vector<dof_id_type> & ids,
824  std::vector<processor_id_type> & new_pids)
825  {
826  const std::size_t ids_size = ids.size();
827  new_pids.resize(ids_size);
828 
829  // Fill those requests in-place
830  for (std::size_t i=0; i != ids_size; ++i)
831  {
832  Node & node = mesh.node_ref(ids[i]);
833  const processor_id_type new_pid = node.processor_id();
834 
835  // We may have an invalid processor_id() on nodes that have been
836  // "detached" from coarsened-away elements but that have not yet
837  // themselves been removed.
838  // libmesh_assert_not_equal_to (new_pid, DofObject::invalid_processor_id);
839  // libmesh_assert_less (new_pid, mesh.n_partitions()); // this is the correct test --
840  new_pids[i] = new_pid; // the number of partitions may
841  } // not equal the number of processors
842  };
843 
844  auto action_functor =
845  [& mesh]
847  const std::vector<dof_id_type> & ids,
848  const std::vector<processor_id_type> & new_pids)
849  {
850  const std::size_t ids_size = ids.size();
851  // Copy the pid changes we've now been informed of
852  for (std::size_t i=0; i != ids_size; ++i)
853  {
854  Node & node = mesh.node_ref(ids[i]);
855 
856  // this is the correct test -- the number of partitions may
857  // not equal the number of processors
858 
859  // But: we may have an invalid processor_id() on nodes that
860  // have been "detached" from coarsened-away elements but
861  // that have not yet themselves been removed.
862  // libmesh_assert_less (filled_request[i], mesh.n_partitions());
863 
864  node.processor_id(new_pids[i]);
865  }
866  };
867 
868  const processor_id_type * ex = nullptr;
870  (mesh.comm(), requested_node_ids, gather_functor, action_functor, ex);
871 
872 #ifdef DEBUG
873  MeshTools::libmesh_assert_valid_procids<Node>(mesh);
874  //MeshTools::libmesh_assert_canonical_node_procids(mesh);
875 #endif
876 }
877 
878 
879 
881 {
883 
884  typedef std::unordered_map<dof_id_type, dof_id_type> map_type;
885 
886  SyncLocalIDs(map_type & _id_map) : id_map(_id_map) {}
887 
889 
890  void gather_data (const std::vector<dof_id_type> & ids,
891  std::vector<datum> & local_ids) const
892  {
893  local_ids.resize(ids.size());
894 
895  for (std::size_t i=0, imax = ids.size(); i != imax; ++i)
896  local_ids[i] = id_map[ids[i]];
897  }
898 
899  void act_on_data (const std::vector<dof_id_type> & ids,
900  const std::vector<datum> & local_ids)
901  {
902  for (std::size_t i=0, imax = local_ids.size(); i != imax; ++i)
903  id_map[ids[i]] = local_ids[i];
904  }
905 };
906 
908 {
909  const dof_id_type n_active_local_elem = mesh.n_active_local_elem();
910 
911  // Find the number of active elements on each processor. We cannot use
912  // mesh.n_active_elem_on_proc(pid) since that only returns the number of
913  // elements assigned to pid which are currently stored on the calling
914  // processor. This will not in general be correct for parallel meshes
915  // when (pid!=mesh.processor_id()).
916  _n_active_elem_on_proc.resize(mesh.n_processors());
917  mesh.comm().allgather(n_active_local_elem, _n_active_elem_on_proc);
918 
919  libMesh::BoundingBox bbox =
921 
922  _global_index_by_pid_map.clear();
923 
924  // create the mapping which is contiguous by processor
926  mesh.active_local_elements_begin(),
927  mesh.active_local_elements_end(),
929 
931 
933  (mesh.comm(), mesh.active_elements_begin(), mesh.active_elements_end(), sync);
934 
935  dof_id_type pid_offset=0;
936  for (processor_id_type pid=0; pid<mesh.n_processors(); pid++)
937  {
938  for (const auto & elem : as_range(mesh.active_pid_elements_begin(pid),
939  mesh.active_pid_elements_end(pid)))
940  {
941  libmesh_assert_less (_global_index_by_pid_map[elem->id()], _n_active_elem_on_proc[pid]);
942 
943  _global_index_by_pid_map[elem->id()] += pid_offset;
944  }
945 
946  pid_offset += _n_active_elem_on_proc[pid];
947  }
948 }
949 
951 {
952  LOG_SCOPE("build_graph()", "ParmetisPartitioner");
953 
954  const dof_id_type n_active_local_elem = mesh.n_active_local_elem();
955  // If we have boundary elements in this mesh, we want to account for
956  // the connectivity between them and interior elements. We can find
957  // interior elements from boundary elements, but we need to build up
958  // a lookup map to do the reverse.
959  typedef std::unordered_multimap<const Elem *, const Elem *> map_type;
960  map_type interior_to_boundary_map;
961 
962  for (const auto & elem : mesh.active_element_ptr_range())
963  {
964  // If we don't have an interior_parent then there's nothing to look us
965  // up.
966  if ((elem->dim() >= LIBMESH_DIM) ||
967  !elem->interior_parent())
968  continue;
969 
970  // get all relevant interior elements
971  std::set<const Elem *> neighbor_set;
972  elem->find_interior_neighbors(neighbor_set);
973 
974  for (const auto & neighbor : neighbor_set)
975  interior_to_boundary_map.insert(std::make_pair(neighbor, elem));
976  }
977 
978 #ifdef LIBMESH_ENABLE_AMR
979  std::vector<const Elem *> neighbors_offspring;
980 #endif
981 
982  // This is costly, and we only need to do it if the mesh has
983  // changed since we last partitioned... but the mesh probably has
984  // changed since we last partitioned, and if it hasn't we don't
985  // have a reliable way to be sure of that.
987 
988  dof_id_type first_local_elem = 0;
989  for (processor_id_type pid=0; pid < mesh.processor_id(); pid++)
990  first_local_elem += _n_active_elem_on_proc[pid];
991 
992  _dual_graph.clear();
993  _dual_graph.resize(n_active_local_elem);
994  _local_id_to_elem.resize(n_active_local_elem);
995 
996  for (const auto & elem : mesh.active_local_element_ptr_range())
997  {
998  libmesh_assert (_global_index_by_pid_map.count(elem->id()));
999  const dof_id_type global_index_by_pid =
1000  _global_index_by_pid_map[elem->id()];
1001 
1002  const dof_id_type local_index =
1003  global_index_by_pid - first_local_elem;
1004  libmesh_assert_less (local_index, n_active_local_elem);
1005 
1006  std::vector<dof_id_type> & graph_row = _dual_graph[local_index];
1007 
1008  // Save this off to make it easy to index later
1009  _local_id_to_elem[local_index] = elem;
1010 
1011  // Loop over the element's neighbors. An element
1012  // adjacency corresponds to a face neighbor
1013  for (auto neighbor : elem->neighbor_ptr_range())
1014  {
1015  if (neighbor != nullptr)
1016  {
1017  // If the neighbor is active treat it
1018  // as a connection
1019  if (neighbor->active())
1020  {
1021  libmesh_assert(_global_index_by_pid_map.count(neighbor->id()));
1022  const dof_id_type neighbor_global_index_by_pid =
1023  _global_index_by_pid_map[neighbor->id()];
1024 
1025  graph_row.push_back(neighbor_global_index_by_pid);
1026  }
1027 
1028 #ifdef LIBMESH_ENABLE_AMR
1029 
1030  // Otherwise we need to find all of the
1031  // neighbor's children that are connected to
1032  // us and add them
1033  else
1034  {
1035  // The side of the neighbor to which
1036  // we are connected
1037  const unsigned int ns =
1038  neighbor->which_neighbor_am_i (elem);
1039  libmesh_assert_less (ns, neighbor->n_neighbors());
1040 
1041  // Get all the active children (& grandchildren, etc...)
1042  // of the neighbor
1043 
1044  // FIXME - this is the wrong thing, since we
1045  // should be getting the active family tree on
1046  // our side only. But adding too many graph
1047  // links may cause hanging nodes to tend to be
1048  // on partition interiors, which would reduce
1049  // communication overhead for constraint
1050  // equations, so we'll leave it.
1051 
1052  neighbor->active_family_tree (neighbors_offspring);
1053 
1054  // Get all the neighbor's children that
1055  // live on that side and are thus connected
1056  // to us
1057  for (std::size_t nc=0; nc<neighbors_offspring.size(); nc++)
1058  {
1059  const Elem * child =
1060  neighbors_offspring[nc];
1061 
1062  // This does not assume a level-1 mesh.
1063  // Note that since children have sides numbered
1064  // coincident with the parent then this is a sufficient test.
1065  if (child->neighbor_ptr(ns) == elem)
1066  {
1067  libmesh_assert (child->active());
1068  libmesh_assert (_global_index_by_pid_map.count(child->id()));
1069  const dof_id_type child_global_index_by_pid =
1070  _global_index_by_pid_map[child->id()];
1071 
1072  graph_row.push_back(child_global_index_by_pid);
1073  }
1074  }
1075  }
1076 
1077 #endif /* ifdef LIBMESH_ENABLE_AMR */
1078 
1079 
1080  }
1081  }
1082 
1083  if ((elem->dim() < LIBMESH_DIM) &&
1084  elem->interior_parent())
1085  {
1086  // get all relevant interior elements
1087  std::set<const Elem *> neighbor_set;
1088  elem->find_interior_neighbors(neighbor_set);
1089 
1090  for (const auto & neighbor : neighbor_set)
1091  {
1092  const dof_id_type neighbor_global_index_by_pid =
1093  _global_index_by_pid_map[neighbor->id()];
1094 
1095  graph_row.push_back(neighbor_global_index_by_pid);
1096  }
1097  }
1098 
1099  // Check for any boundary neighbors
1100  for (const auto & pr : as_range(interior_to_boundary_map.equal_range(elem)))
1101  {
1102  const Elem * neighbor = pr.second;
1103 
1104  const dof_id_type neighbor_global_index_by_pid =
1105  _global_index_by_pid_map[neighbor->id()];
1106 
1107  graph_row.push_back(neighbor_global_index_by_pid);
1108  }
1109  }
1110 
1111 }
1112 
1113 void Partitioner::assign_partitioning (const MeshBase & mesh, const std::vector<dof_id_type> & parts)
1114 {
1115  LOG_SCOPE("assign_partitioning()", "ParmetisPartitioner");
1116 
1117  // This function must be run on all processors at once
1118  libmesh_parallel_only(mesh.comm());
1119 
1120  dof_id_type first_local_elem = 0;
1121  for (processor_id_type pid=0; pid < mesh.processor_id(); pid++)
1122  first_local_elem += _n_active_elem_on_proc[pid];
1123 
1124 #ifndef NDEBUG
1125  const dof_id_type n_active_local_elem = mesh.n_active_local_elem();
1126 #endif
1127 
1128  std::map<processor_id_type, std::vector<dof_id_type>>
1129  requested_ids;
1130 
1131  // Results to gather from each processor - kept in a map so we
1132  // do only one loop over elements after all receives are done.
1133  std::map<processor_id_type, std::vector<processor_id_type>>
1134  filled_request;
1135 
1136  for (auto & elem : mesh.active_element_ptr_range())
1137  {
1138  // we need to get the index from the owning processor
1139  // (note we cannot assign it now -- we are iterating
1140  // over elements again and this will be bad!)
1141  requested_ids[elem->processor_id()].push_back(elem->id());
1142  }
1143 
1144  auto gather_functor =
1145  [this,
1146  & parts,
1147 #ifndef NDEBUG
1148  & mesh,
1149  n_active_local_elem,
1150 #endif
1151  first_local_elem]
1152  (processor_id_type, const std::vector<dof_id_type> & ids,
1153  std::vector<processor_id_type> & data)
1154  {
1155  const std::size_t ids_size = ids.size();
1156  data.resize(ids.size());
1157 
1158  for (std::size_t i=0; i != ids_size; i++)
1159  {
1160  const dof_id_type requested_elem_index = ids[i];
1161 
1162  libmesh_assert(_global_index_by_pid_map.count(requested_elem_index));
1163 
1164  const dof_id_type global_index_by_pid =
1165  _global_index_by_pid_map[requested_elem_index];
1166 
1167  const dof_id_type local_index =
1168  global_index_by_pid - first_local_elem;
1169 
1170  libmesh_assert_less (local_index, parts.size());
1171  libmesh_assert_less (local_index, n_active_local_elem);
1172 
1173  const processor_id_type elem_procid =
1174  cast_int<processor_id_type>(parts[local_index]);
1175 
1176  libmesh_assert_less (elem_procid, mesh.n_partitions());
1177 
1178  data[i] = elem_procid;
1179  }
1180  };
1181 
1182  auto action_functor =
1183  [&filled_request]
1184  (processor_id_type pid,
1185  const std::vector<dof_id_type> &,
1186  const std::vector<processor_id_type> & new_procids)
1187  {
1188  filled_request[pid] = new_procids;
1189  };
1190 
1191  // Trade requests with other processors
1192  const processor_id_type * ex = nullptr;
1194  (mesh.comm(), requested_ids, gather_functor, action_functor, ex);
1195 
1196  // and finally assign the partitioning.
1197  // note we are iterating in exactly the same order
1198  // used to build up the request, so we can expect the
1199  // required entries to be in the proper sequence.
1200  std::vector<unsigned int> counters(mesh.n_processors(), 0);
1201  for (auto & elem : mesh.active_element_ptr_range())
1202  {
1203  const processor_id_type current_pid = elem->processor_id();
1204 
1205  libmesh_assert_less (counters[current_pid], requested_ids[current_pid].size());
1206 
1207  const processor_id_type elem_procid =
1208  filled_request[current_pid][counters[current_pid]++];
1209 
1210  libmesh_assert_less (elem_procid, mesh.n_partitions());
1211  elem->processor_id() = elem_procid;
1212  }
1213 }
1214 
1215 
1216 } // namespace libMesh
void find_global_indices(const Parallel::Communicator &communicator, const libMesh::BoundingBox &, const ForwardIterator &, const ForwardIterator &, std::vector< dof_id_type > &) const
void single_partition(MeshBase &mesh)
Definition: partitioner.C:159
const Elem * parent() const
Definition: elem.h:2479
static void set_interface_node_processor_ids_petscpartitioner(MeshBase &mesh)
Definition: partitioner.C:575
A geometric point in (x,y,z) space associated with a DOF.
Definition: node.h:52
std::unordered_map< dof_id_type, dof_id_type > _global_index_by_pid_map
Definition: partitioner.h:272
void libmesh_assert_valid_remote_elems(const MeshBase &mesh)
Definition: mesh_tools.C:1247
static void set_interface_node_processor_ids_BFS(MeshBase &mesh)
Definition: partitioner.C:498
dof_id_type n_elem(const MeshBase::const_element_iterator &begin, const MeshBase::const_element_iterator &end)
Definition: mesh_tools.C:702
libMesh::BoundingBox create_bounding_box(const MeshBase &mesh)
Definition: mesh_tools.C:386
static void set_node_processor_ids(MeshBase &mesh)
Definition: partitioner.C:679
void act_on_data(const std::vector< dof_id_type > &ids, const std::vector< datum > &local_ids)
Definition: partitioner.C:899
static void set_interface_node_processor_ids_linear(MeshBase &mesh)
Definition: partitioner.C:474
The base class for all geometric element types.
Definition: elem.h:100
MeshBase & mesh
uint8_t processor_id_type
Definition: id_types.h:99
void find_nodal_neighbors(const MeshBase &mesh, const Node &n, const std::vector< std::vector< const Elem *>> &nodes_to_elem_map, std::vector< const Node *> &neighbors)
Definition: mesh_tools.C:740
void build_nodes_to_elem_map(const MeshBase &mesh, std::vector< std::vector< dof_id_type >> &nodes_to_elem_map)
Definition: mesh_tools.C:245
IterBase * end
void assign_partitioning(const MeshBase &mesh, const std::vector< dof_id_type > &parts)
Definition: partitioner.C:1113
virtual void build_graph(const MeshBase &mesh)
Definition: partitioner.C:950
long double max(long double a, double b)
void total_family_tree(std::vector< const Elem *> &active_family, const bool reset=true) const
Definition: elem.C:1557
std::unordered_map< dof_id_type, dof_id_type > map_type
Definition: partitioner.C:884
processor_id_type choose_processor_id(processor_id_type pid1, processor_id_type pid2) const
Definition: node.C:78
std::vector< Elem * > _local_id_to_elem
Definition: partitioner.h:291
Base class for Mesh.
Definition: mesh_base.h:77
SimpleRange< ChildRefIter > child_ref_range()
Definition: elem.h:1779
virtual void _do_repartition(MeshBase &mesh, const unsigned int n)
Definition: partitioner.h:237
virtual element_iterator elements_begin()=0
void repartition(MeshBase &mesh, const unsigned int n)
Definition: partitioner.C:124
void libmesh_ignore(const Args &...)
const dof_id_type n_nodes
Definition: tecplot_io.C:68
void single_partition_range(MeshBase::element_iterator it, MeshBase::element_iterator end)
Definition: partitioner.C:172
dof_id_type id() const
Definition: dof_object.h:655
void pull_parallel_vector_data(const Communicator &comm, const MapToVectors &queries, RequestContainer &reqs, GatherFunctor &gather_data, ActionFunctor &act_on_data, const datum *example)
static const processor_id_type invalid_processor_id
Definition: dof_object.h:358
static void processor_pairs_to_interface_nodes(MeshBase &mesh, std::map< std::pair< processor_id_type, processor_id_type >, std::set< dof_id_type >> &processor_pair_to_nodes)
Definition: partitioner.C:421
SimpleRange< I > as_range(const std::pair< I, I > &p)
Definition: simple_range.h:57
void find_local_indices(const libMesh::BoundingBox &, const ForwardIterator &, const ForwardIterator &, std::unordered_map< dof_id_type, dof_id_type > &) const
virtual void _find_global_index_by_pid_map(const MeshBase &mesh)
Definition: partitioner.C:907
void sync_dofobject_data_by_id(const Communicator &comm, const Iterator &range_begin, const Iterator &range_end, SyncFunctor &sync)
void invalidate_processor_id()
Definition: dof_object.h:603
virtual void _do_partition(MeshBase &mesh, const unsigned int n)=0
static void partition_unpartitioned_elements(MeshBase &mesh)
Definition: partitioner.C:187
const Elem * neighbor_ptr(unsigned int i) const
Definition: elem.h:2050
static const dof_id_type communication_blocksize
Definition: partitioner.h:244
virtual void partition(MeshBase &mesh, const unsigned int n)
Definition: partitioner.C:57
void gather_data(const std::vector< dof_id_type > &ids, std::vector< datum > &local_ids) const
Definition: partitioner.C:890
static void set_parent_processor_ids(MeshBase &mesh)
Definition: partitioner.C:268
bool on_command_line(std::string arg)
Definition: libmesh.C:876
IterBase * data
bool active() const
Definition: elem.h:2390
processor_id_type processor_id() const
Definition: dof_object.h:717
long double min(long double a, double b)
std::vector< std::vector< dof_id_type > > _dual_graph
Definition: partitioner.h:288
std::vector< dof_id_type > _n_active_elem_on_proc
Definition: partitioner.h:281
SyncLocalIDs(map_type &_id_map)
Definition: partitioner.C:886
uint8_t dof_id_type
Definition: id_types.h:64