parmetis_partitioner.C
Go to the documentation of this file.
1 // The libMesh Finite Element Library.
2 // Copyright (C) 2002-2018 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3 
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License as published by the Free Software Foundation; either
7 // version 2.1 of the License, or (at your option) any later version.
8 
9 // This library is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // Lesser General Public License for more details.
13 
14 // You should have received a copy of the GNU Lesser General Public
15 // License along with this library; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 
18 
19 
20 // Local Includes
21 #include "libmesh/libmesh_config.h"
22 #include "libmesh/mesh_base.h"
23 #include "libmesh/parallel.h" // also includes mpi.h
25 #include "libmesh/mesh_tools.h"
31 #include "libmesh/elem.h"
33 
34 // Include the ParMETIS header file.
35 #ifdef LIBMESH_HAVE_PARMETIS
36 
37 namespace Parmetis {
38 extern "C" {
39 # include "libmesh/ignore_warnings.h"
40 # include "parmetis.h"
42 }
43 }
44 
45 #endif
46 
47 
48 // C++ includes
49 #include <unordered_map>
50 
51 
52 namespace libMesh
53 {
54 
55 // Minimum elements on each processor required for us to choose
56 // Parmetis over Metis.
57 #ifdef LIBMESH_HAVE_PARMETIS
58 const unsigned int MIN_ELEM_PER_PROC = 4;
59 #endif
60 
61 // ------------------------------------------------------------
62 // ParmetisPartitioner implementation
64 #ifdef LIBMESH_HAVE_PARMETIS
65  : _pmetis(libmesh_make_unique<ParmetisHelper>())
66 #endif
67 {}
68 
69 
70 
71 ParmetisPartitioner::ParmetisPartitioner (const ParmetisPartitioner & other)
72  : Partitioner(other)
73 #ifdef LIBMESH_HAVE_PARMETIS
74  , _pmetis(libmesh_make_unique<ParmetisHelper>(*(other._pmetis)))
75 #endif
76 {
77 }
78 
79 
80 
81 ParmetisPartitioner::~ParmetisPartitioner() = default;
82 
83 
84 
85 void ParmetisPartitioner::_do_partition (MeshBase & mesh,
86  const unsigned int n_sbdmns)
87 {
88  this->_do_repartition (mesh, n_sbdmns);
89 }
90 
91 
92 
93 void ParmetisPartitioner::_do_repartition (MeshBase & mesh,
94  const unsigned int n_sbdmns)
95 {
96  // This function must be run on all processors at once
97  libmesh_parallel_only(mesh.comm());
98 
99  // Check for easy returns
100  if (!mesh.n_elem())
101  return;
102 
103  if (n_sbdmns == 1)
104  {
105  this->single_partition(mesh);
106  return;
107  }
108 
109  libmesh_assert_greater (n_sbdmns, 0);
110 
111  // What to do if the Parmetis library IS NOT present
112 #ifndef LIBMESH_HAVE_PARMETIS
113 
114  libmesh_do_once(
115  libMesh::out << "ERROR: The library has been built without" << std::endl
116  << "Parmetis support. Using a Metis" << std::endl
117  << "partitioner instead!" << std::endl;);
118 
119  MetisPartitioner mp;
120 
121  // Don't just call partition() here; that would end up calling
122  // post-element-partitioning work redundantly (and at the moment
123  // incorrectly)
124  mp.partition_range (mesh, mesh.active_elements_begin(),
125  mesh.active_elements_end(), n_sbdmns);
126 
127  // What to do if the Parmetis library IS present
128 #else
129 
130  // Revert to METIS on one processor.
131  if (mesh.n_processors() == 1)
132  {
133  // Make sure the mesh knows it's serial
134  mesh.allgather();
135 
136  MetisPartitioner mp;
137  // Don't just call partition() here; that would end up calling
138  // post-element-partitioning work redundantly (and at the moment
139  // incorrectly)
140  mp.partition_range (mesh, mesh.active_elements_begin(),
141  mesh.active_elements_end(), n_sbdmns);
142  return;
143  }
144 
145  LOG_SCOPE("repartition()", "ParmetisPartitioner");
146 
147  // Initialize the data structures required by ParMETIS
148  this->initialize (mesh, n_sbdmns);
149 
150  // Make sure all processors have enough active local elements.
151  // Parmetis tends to crash when it's given only a couple elements
152  // per partition.
153  {
154  bool all_have_enough_elements = true;
155  for (std::size_t pid=0; pid<_n_active_elem_on_proc.size(); pid++)
156  if (_n_active_elem_on_proc[pid] < MIN_ELEM_PER_PROC)
157  all_have_enough_elements = false;
158 
159  // Parmetis will not work unless each processor has some
160  // elements. Specifically, it will abort when passed a nullptr
161  // partition array on *any* of the processors.
162  if (!all_have_enough_elements)
163  {
164  // FIXME: revert to METIS, although this requires a serial mesh
165  MeshSerializer serialize(mesh);
166  MetisPartitioner mp;
167  mp.partition (mesh, n_sbdmns);
168  return;
169  }
170  }
171 
172  // build the graph corresponding to the mesh
173  this->build_graph (mesh);
174 
175 
176  // Partition the graph
177  std::vector<Parmetis::idx_t> vsize(_pmetis->vwgt.size(), 1);
178  Parmetis::real_t itr = 1000000.0;
179  MPI_Comm mpi_comm = mesh.comm().get();
180 
181  // Call the ParMETIS adaptive repartitioning method. This respects the
182  // original partitioning when computing the new partitioning so as to
183  // minimize the required data redistribution.
184  Parmetis::ParMETIS_V3_AdaptiveRepart(_pmetis->vtxdist.empty() ? nullptr : _pmetis->vtxdist.data(),
185  _pmetis->xadj.empty() ? nullptr : _pmetis->xadj.data(),
186  _pmetis->adjncy.empty() ? nullptr : _pmetis->adjncy.data(),
187  _pmetis->vwgt.empty() ? nullptr : _pmetis->vwgt.data(),
188  vsize.empty() ? nullptr : vsize.data(),
189  nullptr,
190  &_pmetis->wgtflag,
191  &_pmetis->numflag,
192  &_pmetis->ncon,
193  &_pmetis->nparts,
194  _pmetis->tpwgts.empty() ? nullptr : _pmetis->tpwgts.data(),
195  _pmetis->ubvec.empty() ? nullptr : _pmetis->ubvec.data(),
196  &itr,
197  _pmetis->options.data(),
198  &_pmetis->edgecut,
199  _pmetis->part.empty() ? nullptr : reinterpret_cast<Parmetis::idx_t *>(_pmetis->part.data()),
200  &mpi_comm);
201 
202  // Assign the returned processor ids
203  this->assign_partitioning (mesh, _pmetis->part);
204 
205 #endif // #ifndef LIBMESH_HAVE_PARMETIS ... else ...
206 
207 }
208 
209 
210 
211 // Only need to compile these methods if ParMETIS is present
212 #ifdef LIBMESH_HAVE_PARMETIS
213 
214 void ParmetisPartitioner::initialize (const MeshBase & mesh,
215  const unsigned int n_sbdmns)
216 {
217  LOG_SCOPE("initialize()", "ParmetisPartitioner");
218 
219  const dof_id_type n_active_local_elem = mesh.n_active_local_elem();
220  // Set parameters.
221  _pmetis->wgtflag = 2; // weights on vertices only
222  _pmetis->ncon = 1; // one weight per vertex
223  _pmetis->numflag = 0; // C-style 0-based numbering
224  _pmetis->nparts = static_cast<Parmetis::idx_t>(n_sbdmns); // number of subdomains to create
225  _pmetis->edgecut = 0; // the numbers of edges cut by the
226  // partition
227 
228  // Initialize data structures for ParMETIS
229  _pmetis->vtxdist.assign (mesh.n_processors()+1, 0);
230  _pmetis->tpwgts.assign (_pmetis->nparts, 1./_pmetis->nparts);
231  _pmetis->ubvec.assign (_pmetis->ncon, 1.05);
232  _pmetis->part.assign (n_active_local_elem, 0);
233  _pmetis->options.resize (5);
234  _pmetis->vwgt.resize (n_active_local_elem);
235 
236  // Set the options
237  _pmetis->options[0] = 1; // don't use default options
238  _pmetis->options[1] = 0; // default (level of timing)
239  _pmetis->options[2] = 15; // random seed (default)
240  _pmetis->options[3] = 2; // processor distribution and subdomain distribution are decoupled
241 
242  // ParMetis expects the elements to be numbered in contiguous blocks
243  // by processor, i.e. [0, ne0), [ne0, ne0+ne1), ...
244  // Since we only partition active elements we should have no expectation
245  // that we currently have such a distribution. So we need to create it.
246  // Also, at the same time we are going to map all the active elements into a globally
247  // unique range [0,n_active_elem) which is *independent* of the current partitioning.
248  // This can be fed to ParMetis as the initial partitioning of the subdomains (decoupled
249  // from the partitioning of the objects themselves). This allows us to get the same
250  // resultant partitioning independent of the input partitioning.
251  libMesh::BoundingBox bbox =
253 
254  _find_global_index_by_pid_map(mesh);
255 
256 
257  // count the total number of active elements in the mesh. Note we cannot
258  // use mesh.n_active_elem() in general since this only returns the number
259  // of active elements which are stored on the calling processor.
260  // We should not use n_active_elem for any allocation because that will
261  // be inherently unscalable, but it can be useful for libmesh_assertions.
262  dof_id_type n_active_elem=0;
263 
264  // Set up the vtxdist array. This will be the same on each processor.
265  // ***** Consult the Parmetis documentation. *****
266  libmesh_assert_equal_to (_pmetis->vtxdist.size(),
267  cast_int<std::size_t>(mesh.n_processors()+1));
268  libmesh_assert_equal_to (_pmetis->vtxdist[0], 0);
269 
270  for (processor_id_type pid=0; pid<mesh.n_processors(); pid++)
271  {
272  _pmetis->vtxdist[pid+1] = _pmetis->vtxdist[pid] + _n_active_elem_on_proc[pid];
273  n_active_elem += _n_active_elem_on_proc[pid];
274  }
275  libmesh_assert_equal_to (_pmetis->vtxdist.back(), static_cast<Parmetis::idx_t>(n_active_elem));
276 
277 
278  // Maps active element ids into a contiguous range independent of partitioning.
279  // (only needs local scope)
280  std::unordered_map<dof_id_type, dof_id_type> global_index_map;
281 
282  {
283  std::vector<dof_id_type> global_index;
284 
285  // create the unique mapping for all active elements independent of partitioning
286  {
287  MeshBase::const_element_iterator it = mesh.active_elements_begin();
288  const MeshBase::const_element_iterator end = mesh.active_elements_end();
289 
290  // Calling this on all processors a unique range in [0,n_active_elem) is constructed.
291  // Only the indices for the elements we pass in are returned in the array.
293  bbox, it, end,
294  global_index);
295 
296  for (dof_id_type cnt=0; it != end; ++it)
297  {
298  const Elem * elem = *it;
299  // vectormap::count forces a sort, which is too expensive
300  // in a loop
301  // libmesh_assert (!global_index_map.count(elem->id()));
302  libmesh_assert_less (cnt, global_index.size());
303  libmesh_assert_less (global_index[cnt], n_active_elem);
304 
305  global_index_map.insert(std::make_pair(elem->id(), global_index[cnt++]));
306  }
307  }
308  // really, shouldn't be close!
309  libmesh_assert_less_equal (global_index_map.size(), n_active_elem);
310  libmesh_assert_less_equal (_global_index_by_pid_map.size(), n_active_elem);
311 
312  // At this point the two maps should be the same size. If they are not
313  // then the number of active elements is not the same as the sum over all
314  // processors of the number of active elements per processor, which means
315  // there must be some unpartitioned objects out there.
316  if (global_index_map.size() != _global_index_by_pid_map.size())
317  libmesh_error_msg("ERROR: ParmetisPartitioner cannot handle unpartitioned objects!");
318  }
319 
320  // Finally, we need to initialize the vertex (partition) weights and the initial subdomain
321  // mapping. The subdomain mapping will be independent of the processor mapping, and is
322  // defined by a simple mapping of the global indices we just found.
323  {
324  std::vector<dof_id_type> subdomain_bounds(mesh.n_processors());
325 
326  const dof_id_type first_local_elem = _pmetis->vtxdist[mesh.processor_id()];
327 
328  for (processor_id_type pid=0; pid<mesh.n_processors(); pid++)
329  {
330  dof_id_type tgt_subdomain_size = 0;
331 
332  // watch out for the case that n_subdomains < n_processors
333  if (pid < static_cast<unsigned int>(_pmetis->nparts))
334  {
335  tgt_subdomain_size = n_active_elem/std::min
336  (cast_int<Parmetis::idx_t>(mesh.n_processors()), _pmetis->nparts);
337 
338  if (pid < n_active_elem%_pmetis->nparts)
339  tgt_subdomain_size++;
340  }
341  if (pid == 0)
342  subdomain_bounds[0] = tgt_subdomain_size;
343  else
344  subdomain_bounds[pid] = subdomain_bounds[pid-1] + tgt_subdomain_size;
345  }
346 
347  libmesh_assert_equal_to (subdomain_bounds.back(), n_active_elem);
348 
349  for (const auto & elem : mesh.active_local_element_ptr_range())
350  {
351  libmesh_assert (_global_index_by_pid_map.count(elem->id()));
352  const dof_id_type global_index_by_pid =
353  _global_index_by_pid_map[elem->id()];
354  libmesh_assert_less (global_index_by_pid, n_active_elem);
355 
356  const dof_id_type local_index =
357  global_index_by_pid - first_local_elem;
358 
359  libmesh_assert_less (local_index, n_active_local_elem);
360  libmesh_assert_less (local_index, _pmetis->vwgt.size());
361 
362  // TODO:[BSK] maybe there is a better weight?
363  _pmetis->vwgt[local_index] = elem->n_nodes();
364 
365  // find the subdomain this element belongs in
366  libmesh_assert (global_index_map.count(elem->id()));
367  const dof_id_type global_index =
368  global_index_map[elem->id()];
369 
370  libmesh_assert_less (global_index, subdomain_bounds.back());
371 
372  const unsigned int subdomain_id =
373  cast_int<unsigned int>
374  (std::distance(subdomain_bounds.begin(),
375  std::lower_bound(subdomain_bounds.begin(),
376  subdomain_bounds.end(),
377  global_index)));
378  libmesh_assert_less (subdomain_id, static_cast<unsigned int>(_pmetis->nparts));
379  libmesh_assert_less (local_index, _pmetis->part.size());
380 
381  _pmetis->part[local_index] = subdomain_id;
382  }
383  }
384 }
385 
386 
387 
388 void ParmetisPartitioner::build_graph (const MeshBase & mesh)
389 {
390  LOG_SCOPE("build_graph()", "ParmetisPartitioner");
391 
392  // build the graph in distributed CSR format. Note that
393  // the edges in the graph will correspond to
394  // face neighbors
395  const dof_id_type n_active_local_elem = mesh.n_active_local_elem();
396 
397  Partitioner::build_graph(mesh);
398 
399  dof_id_type graph_size=0;
400 
401  for (auto & row: _dual_graph)
402  graph_size += cast_int<dof_id_type>(row.size());
403 
404  // Reserve space in the adjacency array
405  _pmetis->xadj.clear();
406  _pmetis->xadj.reserve (n_active_local_elem + 1);
407  _pmetis->adjncy.clear();
408  _pmetis->adjncy.reserve (graph_size);
409 
410  for (auto & graph_row : _dual_graph)
411  {
412  _pmetis->xadj.push_back(cast_int<int>(_pmetis->adjncy.size()));
413  _pmetis->adjncy.insert(_pmetis->adjncy.end(),
414  graph_row.begin(),
415  graph_row.end());
416  }
417 
418  // The end of the adjacency array for the last elem
419  _pmetis->xadj.push_back(cast_int<int>(_pmetis->adjncy.size()));
420 
421  libmesh_assert_equal_to (_pmetis->xadj.size(), n_active_local_elem+1);
422  libmesh_assert_equal_to (_pmetis->adjncy.size(), graph_size);
423 }
424 
425 #endif // #ifdef LIBMESH_HAVE_PARMETIS
426 
427 } // namespace libMesh
void find_global_indices(const Parallel::Communicator &communicator, const libMesh::BoundingBox &, const ForwardIterator &, const ForwardIterator &, std::vector< dof_id_type > &) const
libMesh::BoundingBox create_bounding_box(const MeshBase &mesh)
Definition: mesh_tools.C:386
virtual void partition_range(MeshBase &mesh, MeshBase::element_iterator it, MeshBase::element_iterator end, const unsigned int n) override
The base class for all geometric element types.
Definition: elem.h:100
MeshBase & mesh
uint8_t processor_id_type
Definition: id_types.h:99
IterBase * end
Partitioner which interfaces with the METIS library.
Base class for Mesh.
Definition: mesh_base.h:77
dof_id_type id() const
Definition: dof_object.h:655
const unsigned int MIN_ELEM_PER_PROC
Temporarily serializes a DistributedMesh for output.
virtual void partition(MeshBase &mesh, const unsigned int n)
Definition: partitioner.C:57
OStreamProxy out(std::cout)
long double min(long double a, double b)
uint8_t dof_id_type
Definition: id_types.h:64