sparsity_pattern.C
Go to the documentation of this file.
1 // The libMesh Finite Element Library.
2 // Copyright (C) 2002-2018 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3 
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License as published by the Free Software Foundation; either
7 // version 2.1 of the License, or (at your option) any later version.
8 
9 // This library is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // Lesser General Public License for more details.
13 
14 // You should have received a copy of the GNU Lesser General Public
15 // License along with this library; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 
18 
19 
20 // Local Includes
22 #include "libmesh/dof_map.h"
23 #include "libmesh/elem.h"
26 #include "libmesh/communicator.h"
28 
29 
30 namespace libMesh
31 {
32 namespace SparsityPattern
33 {
34 
35 //-------------------------------------------------------
36 // we need to implement these constructors here so that
37 // a full DofMap definition is available.
38 Build::Build (const MeshBase & mesh_in,
39  const DofMap & dof_map_in,
40  const CouplingMatrix * dof_coupling_in,
41  std::set<GhostingFunctor *> coupling_functors_in,
42  const bool implicit_neighbor_dofs_in,
43  const bool need_full_sparsity_pattern_in) :
44  ParallelObject(dof_map_in),
45  mesh(mesh_in),
46  dof_map(dof_map_in),
47  dof_coupling(dof_coupling_in),
48  coupling_functors(coupling_functors_in),
49  implicit_neighbor_dofs(implicit_neighbor_dofs_in),
50  need_full_sparsity_pattern(need_full_sparsity_pattern_in),
51  sparsity_pattern(),
52  nonlocal_pattern(),
53  n_nz(),
54  n_oz()
55 {}
56 
57 
58 
60  ParallelObject(other),
61  mesh(other.mesh),
62  dof_map(other.dof_map),
63  dof_coupling(other.dof_coupling),
64  coupling_functors(other.coupling_functors),
65  implicit_neighbor_dofs(other.implicit_neighbor_dofs),
66  need_full_sparsity_pattern(other.need_full_sparsity_pattern),
67  sparsity_pattern(),
68  nonlocal_pattern(),
69  n_nz(),
70  n_oz()
71 {}
72 
73 
74 
75 #if defined(__GNUC__) && (__GNUC__ < 4) && !defined(__INTEL_COMPILER)
76 
77 void _dummy_function(void) {}
78 
79 #endif
80 
81 
82 
84  std::vector<dof_id_type> & dofs_vi,
85  unsigned int vi)
86 {
87  dof_map.dof_indices (elem, dofs_vi, vi);
88 #ifdef LIBMESH_ENABLE_CONSTRAINTS
89  dof_map.find_connected_dofs (dofs_vi);
90 #endif
91  // We can be more efficient if we sort the element DOFs into
92  // increasing order
93  std::sort(dofs_vi.begin(), dofs_vi.end());
94 }
95 
96 
97 
98 void Build::handle_vi_vj(const std::vector<dof_id_type> & element_dofs_i,
99  const std::vector<dof_id_type> & element_dofs_j)
100 {
101  const unsigned int n_dofs_on_element_i =
102  cast_int<unsigned int>(element_dofs_i.size());
103 
104  const processor_id_type proc_id = mesh.processor_id();
105  const dof_id_type first_dof_on_proc = dof_map.first_dof(proc_id);
106  const dof_id_type end_dof_on_proc = dof_map.end_dof(proc_id);
107 
108  std::vector<dof_id_type>
109  dofs_to_add;
110 
111  const unsigned int n_dofs_on_element_j =
112  cast_int<unsigned int>(element_dofs_j.size());
113 
114  // there might be 0 dofs for the other variable on the same element
115  // (when subdomain variables do not overlap) and that's when we do
116  // not do anything
117  if (n_dofs_on_element_j > 0)
118  {
119  for (unsigned int i=0; i<n_dofs_on_element_i; i++)
120  {
121  const dof_id_type ig = element_dofs_i[i];
122 
123  SparsityPattern::Row * row;
124 
125  // We save non-local row components for now so we can
126  // communicate them to other processors later.
127 
128  if ((ig >= first_dof_on_proc) &&
129  (ig < end_dof_on_proc))
130  {
131  // This is what I mean
132  // libmesh_assert_greater_equal ((ig - first_dof_on_proc), 0);
133  // but do the test like this because ig and
134  // first_dof_on_proc are unsigned ints
135  libmesh_assert_greater_equal (ig, first_dof_on_proc);
136  libmesh_assert_less (ig, (sparsity_pattern.size() +
137  first_dof_on_proc));
138 
139  row = &sparsity_pattern[ig - first_dof_on_proc];
140  }
141  else
142  {
143  row = &nonlocal_pattern[ig];
144  }
145 
146  // If the row is empty we will add *all*
147  // the element j DOFs, so just do that.
148  if (row->empty())
149  {
150  row->insert(row->end(),
151  element_dofs_j.begin(),
152  element_dofs_j.end());
153  }
154  else
155  {
156  // Build a list of the DOF indices not found in the
157  // sparsity pattern
158  dofs_to_add.clear();
159 
160  // Cache iterators. Low will move forward, subsequent
161  // searches will be on smaller ranges
162  SparsityPattern::Row::iterator
163  low = std::lower_bound
164  (row->begin(), row->end(), element_dofs_j.front()),
165  high = std::upper_bound
166  (low, row->end(), element_dofs_j.back());
167 
168  for (unsigned int j=0; j<n_dofs_on_element_j; j++)
169  {
170  const dof_id_type jg = element_dofs_j[j];
171 
172  // See if jg is in the sorted range
173  std::pair<SparsityPattern::Row::iterator,
174  SparsityPattern::Row::iterator>
175  pos = std::equal_range (low, high, jg);
176 
177  // Must add jg if it wasn't found
178  if (pos.first == pos.second)
179  dofs_to_add.push_back(jg);
180 
181  // pos.first is now a valid lower bound for any
182  // remaining element j DOFs. (That's why we sorted them.)
183  // Use it for the next search
184  low = pos.first;
185  }
186 
187  // Add to the sparsity pattern
188  if (!dofs_to_add.empty())
189  {
190  const std::size_t old_size = row->size();
191 
192  row->insert (row->end(),
193  dofs_to_add.begin(),
194  dofs_to_add.end());
195 
197  (row->begin(), row->begin()+old_size,
198  row->end());
199  }
200  }
201  } // End dofs-of-var-i loop
202  } // End if-dofs-of-var-j
203 }
204 
205 
206 
208 {
209  // Compute the sparsity structure of the global matrix. This can be
210  // fed into a PetscMatrix to allocate exactly the number of nonzeros
211  // necessary to store the matrix. This algorithm should be linear
212  // in the (# of elements)*(# nodes per element)
213  const processor_id_type proc_id = mesh.processor_id();
214  const dof_id_type n_dofs_on_proc = dof_map.n_dofs_on_processor(proc_id);
215  const dof_id_type first_dof_on_proc = dof_map.first_dof(proc_id);
216  const dof_id_type end_dof_on_proc = dof_map.end_dof(proc_id);
217 
218  sparsity_pattern.resize(n_dofs_on_proc);
219 
220  // Handle dof coupling specified by library and user coupling functors
221  {
222  const unsigned int n_var = dof_map.n_variables();
223 
224  std::vector<std::vector<dof_id_type> > element_dofs_i(n_var);
225 
226  std::vector<const Elem *> coupled_neighbors;
227  for (const auto & elem : range)
228  {
229  // Make some fake element iterators defining a range
230  // pointing to only this element.
231  Elem * const * elempp = const_cast<Elem * const *>(&elem);
232  Elem * const * elemend = elempp+1;
233 
234  const MeshBase::const_element_iterator fake_elem_it =
236  elemend,
238 
239  const MeshBase::const_element_iterator fake_elem_end =
241  elemend,
243 
244  GhostingFunctor::map_type elements_to_couple;
245 
246  // Man, I wish we had guaranteed unique_ptr availability...
247  std::set<CouplingMatrix *> temporary_coupling_matrices;
248 
249  dof_map.merge_ghost_functor_outputs(elements_to_couple,
250  temporary_coupling_matrices,
253  fake_elem_it,
254  fake_elem_end,
256  for (unsigned int vi=0; vi<n_var; vi++)
257  this->sorted_connected_dofs(elem, element_dofs_i[vi], vi);
258 
259  for (unsigned int vi=0; vi<n_var; vi++)
260  for (const auto & pr : elements_to_couple)
261  {
262  const Elem * const partner = pr.first;
263  const CouplingMatrix * ghost_coupling = pr.second;
264 
265  // Loop over coupling matrix row variables if we have a
266  // coupling matrix, or all variables if not.
267  if (ghost_coupling)
268  {
269  libmesh_assert_equal_to (ghost_coupling->size(), n_var);
270  ConstCouplingRow ccr(vi, *ghost_coupling);
271 
272  for (const auto & idx : ccr)
273  {
274  if (partner == elem)
275  this->handle_vi_vj(element_dofs_i[vi], element_dofs_i[idx]);
276  else
277  {
278  std::vector<dof_id_type> partner_dofs;
279  this->sorted_connected_dofs(partner, partner_dofs, idx);
280  this->handle_vi_vj(element_dofs_i[vi], partner_dofs);
281  }
282  }
283  }
284  else
285  {
286  for (unsigned int vj = 0; vj != n_var; ++vj)
287  {
288  if (partner == elem)
289  this->handle_vi_vj(element_dofs_i[vi], element_dofs_i[vj]);
290  else
291  {
292  std::vector<dof_id_type> partner_dofs;
293  this->sorted_connected_dofs(partner, partner_dofs, vj);
294  this->handle_vi_vj(element_dofs_i[vi], partner_dofs);
295  }
296  }
297  }
298  } // End ghosted element loop
299 
300  for (auto & mat : temporary_coupling_matrices)
301  delete mat;
302 
303  } // End range element loop
304  } // End ghosting functor section
305 
306  // Now a new chunk of sparsity structure is built for all of the
307  // DOFs connected to our rows of the matrix.
308 
309  // If we're building a full sparsity pattern, then we've got
310  // complete rows to work with, so we can just count them from
311  // scratch.
313  {
314  n_nz.clear();
315  n_oz.clear();
316  }
317 
318  n_nz.resize (n_dofs_on_proc, 0);
319  n_oz.resize (n_dofs_on_proc, 0);
320 
321  for (dof_id_type i=0; i<n_dofs_on_proc; i++)
322  {
323  // Get the row of the sparsity pattern
325 
326  for (const auto & df : row)
327  if ((df < first_dof_on_proc) || (df >= end_dof_on_proc))
328  n_oz[i]++;
329  else
330  n_nz[i]++;
331 
332  // If we're not building a full sparsity pattern, then we want
333  // to avoid overcounting these entries as much as possible.
335  row.clear();
336  }
337 }
338 
339 
340 
342 {
343  const processor_id_type proc_id = mesh.processor_id();
344  const dof_id_type n_global_dofs = dof_map.n_dofs();
345  const dof_id_type n_dofs_on_proc = dof_map.n_dofs_on_processor(proc_id);
346  const dof_id_type first_dof_on_proc = dof_map.first_dof(proc_id);
347  const dof_id_type end_dof_on_proc = dof_map.end_dof(proc_id);
348 
349  libmesh_assert_equal_to (sparsity_pattern.size(), other.sparsity_pattern.size());
350  libmesh_assert_equal_to (n_nz.size(), sparsity_pattern.size());
351  libmesh_assert_equal_to (n_oz.size(), sparsity_pattern.size());
352 
353  for (dof_id_type r=0; r<n_dofs_on_proc; r++)
354  {
355  // increment the number of on and off-processor nonzeros in this row
356  // (note this will be an upper bound unless we need the full sparsity pattern)
358  {
360  const SparsityPattern::Row & their_row = other.sparsity_pattern[r];
361 
362  // simple copy if I have no dofs
363  if (my_row.empty())
364  my_row = their_row;
365 
366  // otherwise add their DOFs to mine, resort, and re-unique the row
367  else if (!their_row.empty()) // do nothing for the trivial case where
368  { // their row is empty
369  my_row.insert (my_row.end(),
370  their_row.begin(),
371  their_row.end());
372 
373  // We cannot use SparsityPattern::sort_row() here because it expects
374  // the [begin,middle) [middle,end) to be non-overlapping. This is not
375  // necessarily the case here, so use std::sort()
376  std::sort (my_row.begin(), my_row.end());
377 
378  my_row.erase(std::unique (my_row.begin(), my_row.end()), my_row.end());
379  }
380 
381  // fix the number of on and off-processor nonzeros in this row
382  n_nz[r] = n_oz[r] = 0;
383 
384  for (const auto & df : my_row)
385  if ((df < first_dof_on_proc) || (df >= end_dof_on_proc))
386  n_oz[r]++;
387  else
388  n_nz[r]++;
389  }
390  else
391  {
392  n_nz[r] += other.n_nz[r];
393  n_nz[r] = std::min(n_nz[r], n_dofs_on_proc);
394  n_oz[r] += other.n_oz[r];
395  n_oz[r] =std::min(n_oz[r], static_cast<dof_id_type>(n_global_dofs-n_nz[r]));
396  }
397  }
398 
399  // Move nonlocal row information to ourselves; the other thread
400  // won't need it in the map after that.
401  for (const auto & p : other.nonlocal_pattern)
402  {
403 #ifndef NDEBUG
404  const dof_id_type dof_id = p.first;
405 
406  processor_id_type dbg_proc_id = 0;
407  while (dof_id >= dof_map.end_dof(dbg_proc_id))
408  dbg_proc_id++;
409  libmesh_assert (dbg_proc_id != this->processor_id());
410 #endif
411 
412  const SparsityPattern::Row & their_row = p.second;
413 
414  // We should have no empty values in a map
415  libmesh_assert (!their_row.empty());
416 
417  NonlocalGraph::iterator my_it = nonlocal_pattern.find(p.first);
418  if (my_it == nonlocal_pattern.end())
419  {
420  // nonlocal_pattern[it->first].swap(their_row);
421  nonlocal_pattern[p.first] = their_row;
422  }
423  else
424  {
425  SparsityPattern::Row & my_row = my_it->second;
426 
427  my_row.insert (my_row.end(),
428  their_row.begin(),
429  their_row.end());
430 
431  // We cannot use SparsityPattern::sort_row() here because it expects
432  // the [begin,middle) [middle,end) to be non-overlapping. This is not
433  // necessarily the case here, so use std::sort()
434  std::sort (my_row.begin(), my_row.end());
435 
436  my_row.erase(std::unique (my_row.begin(), my_row.end()), my_row.end());
437  }
438  }
439 }
440 
441 
442 
444 {
445  parallel_object_only();
446  libmesh_assert(this->comm().verify(need_full_sparsity_pattern));
447 
448  auto & comm = this->comm();
449  auto pid = comm.rank();
450  auto num_procs = comm.size();
451 
452  auto dof_tag = comm.get_unique_tag(998);
453  auto row_tag = comm.get_unique_tag(9998);
454 
455  const auto n_global_dofs = dof_map.n_dofs();
456  const auto n_dofs_on_proc = dof_map.n_dofs_on_processor(pid);
457  const auto local_first_dof = dof_map.first_dof();
458  const auto local_end_dof = dof_map.end_dof();
459 
460  // The data to send
461  std::map<processor_id_type, std::pair<std::vector<dof_id_type>, std::vector<Row>>> data_to_send;
462 
463  // True/false if we're sending to that processor
464  std::vector<char> will_send_to(num_procs);
465 
466  processor_id_type num_sends = 0;
467 
468  // Loop over the nonlocal rows and transform them into the new datastructure
469  NonlocalGraph::iterator it = nonlocal_pattern.begin();
470  while (it != nonlocal_pattern.end())
471  {
472  const auto dof_id = it->first;
473  auto & row = it->second;
474 
475  processor_id_type proc_id = 0;
476  while (dof_id >= dof_map.end_dof(proc_id))
477  proc_id++;
478 
479  // Count the unique sends
480  if (!will_send_to[proc_id])
481  num_sends++;
482 
483  will_send_to[proc_id] = true;
484 
485  // rhs [] on purpose
486  auto & proc_data = data_to_send[proc_id];
487 
488  proc_data.first.push_back(dof_id);
489 
490  // Note this invalidates the data in nonlocal_pattern
491  proc_data.second.emplace_back(std::move(row));
492 
493  // Might as well remove it since it's invalidated anyway
494  it = nonlocal_pattern.erase(it);
495  }
496 
497  // Tell everyone about where everyone will send to
498  comm.alltoall(will_send_to);
499 
500  // will_send_to now represents who we'll receive from
501  // give it a good name
502  auto & will_receive_from = will_send_to;
503 
504  std::vector<Parallel::Request> dof_sends(num_sends);
505  std::vector<Parallel::Request> row_sends(num_sends);
506 
507  // Post all of the sends
508  processor_id_type current_send = 0;
509  for (auto & proc_data : data_to_send)
510  {
511  auto proc_id = proc_data.first;
512 
513  auto & dofs = proc_data.second.first;
514  auto & rows = proc_data.second.second;
515 
516  comm.send(proc_id, dofs, dof_sends[current_send], dof_tag);
517  comm.send(proc_id, rows, row_sends[current_send], row_tag);
518 
519  current_send++;
520  }
521 
522  // Figure out how many receives we're going to have and make a
523  // quick list of who's sending
524  processor_id_type num_receives = 0;
525  std::vector<processor_id_type> receiving_from;
526  for (processor_id_type proc_id = 0; proc_id < num_procs; proc_id++)
527  {
528  auto will = will_receive_from[proc_id];
529 
530  if (will)
531  {
532  num_receives++;
533  receiving_from.push_back(proc_id);
534  }
535  }
536 
537  // Post the receives and handle the data
538  for (auto proc_id : receiving_from)
539  {
540  std::vector<dof_id_type> in_dofs;
541  std::vector<Row> in_rows;
542 
543  // Receive dofs
544  comm.receive(proc_id, in_dofs, dof_tag);
545  comm.receive(proc_id, in_rows, row_tag);
546 
547  const std::size_t n_rows = in_dofs.size();
548  for (std::size_t i = 0; i != n_rows; ++i)
549  {
550  const auto r = in_dofs[i];
551  const auto my_r = r - local_first_dof;
552 
553  auto & their_row = in_rows[i];
554 
556  {
557  auto & my_row = sparsity_pattern[my_r];
558 
559  // They wouldn't have sent an empty row
560  libmesh_assert(!their_row.empty());
561 
562  // We can end up with an empty row on a dof that touches our
563  // inactive elements but not our active ones
564  if (my_row.empty())
565  {
566  my_row.assign (their_row.begin(), their_row.end());
567  }
568  else
569  {
570  my_row.insert (my_row.end(),
571  their_row.begin(),
572  their_row.end());
573 
574  // We cannot use SparsityPattern::sort_row() here because it expects
575  // the [begin,middle) [middle,end) to be non-overlapping. This is not
576  // necessarily the case here, so use std::sort()
577  std::sort (my_row.begin(), my_row.end());
578 
579  my_row.erase(std::unique (my_row.begin(), my_row.end()), my_row.end());
580  }
581 
582  // fix the number of on and off-processor nonzeros in this row
583  n_nz[my_r] = n_oz[my_r] = 0;
584 
585  for (const auto & df : my_row)
586  if ((df < local_first_dof) || (df >= local_end_dof))
587  n_oz[my_r]++;
588  else
589  n_nz[my_r]++;
590  }
591  else
592  {
593  for (const auto & df : their_row)
594  if ((df < local_first_dof) || (df >= local_end_dof))
595  n_oz[my_r]++;
596  else
597  n_nz[my_r]++;
598 
599  n_nz[my_r] = std::min(n_nz[my_r], n_dofs_on_proc);
600  n_oz[my_r] = std::min(n_oz[my_r],
601  static_cast<dof_id_type>(n_global_dofs-n_nz[my_r]));
602  }
603  }
604  }
605 
606  // We should have sent everything at this point.
607  libmesh_assert (nonlocal_pattern.empty());
608 
609  // Make sure to cleanup requests
610  Parallel::wait(dof_sends);
611  Parallel::wait(row_sends);
612 }
613 
614 
615 } // namespace SparsityPattern
616 } // namespace libMesh
void find_connected_dofs(std::vector< dof_id_type > &elem_dofs) const
Definition: dof_map.C:2633
SparsityPattern::Graph sparsity_pattern
void send(const unsigned int dest_processor_id, const T &buf, const MessageTag &tag=no_tag) const
void wait(std::vector< Request > &r)
Definition: request.C:213
std::set< GhostingFunctor * >::const_iterator coupling_functors_begin() const
Definition: dof_map.h:317
dof_id_type n_dofs_on_processor(const processor_id_type proc) const
Definition: dof_map.h:590
void dof_indices(const Elem *const elem, std::vector< dof_id_type > &di) const
Definition: dof_map.C:1930
processor_id_type size() const
Definition: communicator.h:175
The base class for all geometric element types.
Definition: elem.h:100
MeshBase & mesh
uint8_t processor_id_type
Definition: id_types.h:99
unsigned int n_variables() const
Definition: dof_map.h:541
void alltoall(std::vector< T, A > &r) const
const Parallel::Communicator & comm() const
MessageTag get_unique_tag(int tagvalue) const
Definition: communicator.C:201
Utility class for defining generic ranges for threading.
Definition: stored_range.h:52
dof_id_type dof_id
Definition: xdr_io.C:49
Base class for Mesh.
Definition: mesh_base.h:77
Manages the degrees of freedom (DOFs) in a simulation.
Definition: dof_map.h:176
std::vector< dof_id_type, Threads::scalable_allocator< dof_id_type > > Row
void join(const Build &other)
dof_id_type n_dofs() const
Definition: dof_map.h:574
processor_id_type rank() const
Definition: communicator.h:173
static const processor_id_type invalid_processor_id
Definition: dof_object.h:358
void sorted_connected_dofs(const Elem *elem, std::vector< dof_id_type > &dofs_vi, unsigned int vi)
std::unordered_map< const Elem *, const CouplingMatrix * > map_type
std::vector< dof_id_type > n_nz
An object whose state is distributed along a set of processors.
unsigned int size(const data_type &type) const
Definition: status.h:152
SparsityPattern::NonlocalGraph nonlocal_pattern
Status receive(const unsigned int dest_processor_id, T &buf, const MessageTag &tag=any_tag) const
Build(const MeshBase &mesh_in, const DofMap &dof_map_in, const CouplingMatrix *dof_coupling_in, std::set< GhostingFunctor *> coupling_functors_in, const bool implicit_neighbor_dofs_in, const bool need_full_sparsity_pattern_in)
std::set< GhostingFunctor * >::const_iterator coupling_functors_end() const
Definition: dof_map.h:323
void handle_vi_vj(const std::vector< dof_id_type > &element_dofs_i, const std::vector< dof_id_type > &element_dofs_j)
void operator()(const ConstElemRange &range)
dof_id_type first_dof(const processor_id_type proc) const
Definition: dof_map.h:599
dof_id_type end_dof(const processor_id_type proc) const
Definition: dof_map.h:641
processor_id_type processor_id() const
unsigned int size() const
long double min(long double a, double b)
static void merge_ghost_functor_outputs(GhostingFunctor::map_type &elements_to_ghost, std::set< CouplingMatrix *> &temporary_coupling_matrices, const std::set< GhostingFunctor *>::iterator &gf_begin, const std::set< GhostingFunctor *>::iterator &gf_end, const MeshBase::const_element_iterator &elems_begin, const MeshBase::const_element_iterator &elems_end, processor_id_type p)
Definition: dof_map.C:1432
unsigned int idx(const ElemType type, const unsigned int nx, const unsigned int i, const unsigned int j)
uint8_t dof_id_type
Definition: id_types.h:64
Defines the coupling between variables of a System.
static void sort_row(const BidirectionalIterator begin, BidirectionalIterator middle, const BidirectionalIterator end)
std::vector< dof_id_type > n_oz