parallel_implementation.h
Go to the documentation of this file.
1 // The libMesh Finite Element Library.
2 // Copyright (C) 2002-2018 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3 
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License as published by the Free Software Foundation; either
7 // version 2.1 of the License, or (at your option) any later version.
8 
9 // This library is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // Lesser General Public License for more details.
13 
14 // You should have received a copy of the GNU Lesser General Public
15 // License along with this library; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 
18 
19 #ifndef LIBMESH_PARALLEL_IMPLEMENTATION_H
20 #define LIBMESH_PARALLEL_IMPLEMENTATION_H
21 
22 // Local includes
23 #include "parallel.h"
24 #include "libmesh_logging.h"
25 
26 // C++ includes
27 #include <complex>
28 #include <cstddef>
29 #include <cstring> // memcpy
30 #include <iterator>
31 #include <limits>
32 #include <map>
33 #include <memory>
34 #include <set>
35 #include <string>
36 #include <utility>
37 #include <vector>
38 #include <type_traits>
39 
40 namespace libMesh {
41 namespace Parallel {
42 
43 #ifdef LIBMESH_HAVE_MPI
44 
49 template <typename T>
51 
52 #endif // LIBMESH_HAVE_MPI
53 
57 template <typename T>
59 {
60 public:
61  T val;
62  int rank;
63 };
64 
65 } // namespace Parallel
66 
67 } // namespace libMesh
68 
69 
70 // Anonymous namespace for helper functions
71 namespace {
72 
73 // Internal helper function to create vector<something_usable> from
74 // vector<bool> for compatibility with MPI bitwise operations
75 template <typename T, typename A1, typename A2>
76 inline void pack_vector_bool(const std::vector<bool,A1> & vec_in,
77  std::vector<T,A2> & vec_out)
78 {
79  unsigned int data_bits = 8*sizeof(T);
80  std::size_t in_size = vec_in.size();
81  std::size_t out_size = in_size/data_bits + ((in_size%data_bits)?1:0);
82  vec_out.clear();
83  vec_out.resize(out_size);
84  for (std::size_t i=0; i != in_size; ++i)
85  {
86  std::size_t index = i/data_bits;
87  std::size_t offset = i%data_bits;
88  vec_out[index] += (vec_in[i]?1:0) << offset;
89  }
90 }
91 
92 // Internal helper function to create vector<bool> from
93 // vector<something usable> for compatibility with MPI byte
94 // operations
95 template <typename T, typename A1, typename A2>
96 inline void unpack_vector_bool(const std::vector<T,A1> & vec_in,
97  std::vector<bool,A2> & vec_out)
98 {
99  unsigned int data_bits = 8*sizeof(T);
100  // We need the output vector to already be properly sized
101  std::size_t out_size = vec_out.size();
102  libmesh_assert_equal_to
103  (out_size/data_bits + (out_size%data_bits?1:0), vec_in.size());
104 
105  for (std::size_t i=0; i != out_size; ++i)
106  {
107  std::size_t index = i/data_bits;
108  std::size_t offset = i%data_bits;
109  vec_out[i] = vec_in[index] << (data_bits-1-offset) >> (data_bits-1);
110  }
111 }
112 
113 
114 #ifdef LIBMESH_HAVE_MPI
115 // We use a helper function here to avoid ambiguity when calling
116 // send_receive of (vector<vector<T>>,vector<vector<T>>)
117 template <typename T1, typename T2, typename A1, typename A2, typename A3, typename A4>
118 inline void send_receive_vec_of_vec(const unsigned int dest_processor_id,
119  const std::vector<std::vector<T1,A1>,A2> & send,
120  const unsigned int source_processor_id,
121  std::vector<std::vector<T2,A3>,A4> & recv,
122  const libMesh::Parallel::MessageTag & send_tag,
123  const libMesh::Parallel::MessageTag & recv_tag,
124  const libMesh::Parallel::Communicator & comm)
125 {
126  LOG_SCOPE("send_receive()", "Parallel");
127 
128  if (dest_processor_id == comm.rank() &&
129  source_processor_id == comm.rank())
130  {
131  recv = send;
132  return;
133  }
134 
136  comm.send (dest_processor_id, send, request, send_tag);
137  comm.receive (source_processor_id, recv, recv_tag);
138  request.wait();
139 }
140 
141 #endif // LIBMESH_HAVE_MPI
142 
143 } // Anonymous namespace
144 
145 
146 
147 namespace libMesh
148 {
149 
150 namespace Parallel
151 {
152 
153 #ifdef LIBMESH_HAVE_MPI
154 template<>
155 inline data_type dataplusint_type<short int>() { return MPI_SHORT_INT; }
156 
157 template<>
158 inline data_type dataplusint_type<int>() { return MPI_2INT; }
159 
160 template<>
161 inline data_type dataplusint_type<long>() { return MPI_LONG_INT; }
162 
163 template<>
164 inline data_type dataplusint_type<float>() { return MPI_FLOAT_INT; }
165 
166 template<>
167 inline data_type dataplusint_type<double>() { return MPI_DOUBLE_INT; }
168 
169 template<>
170 inline data_type dataplusint_type<long double>() { return MPI_LONG_DOUBLE_INT; }
171 
172 
173 
174 inline status Communicator::probe (const unsigned int src_processor_id,
175  const MessageTag & tag) const
176 {
177  LOG_SCOPE("probe()", "Parallel");
178 
179  status stat;
180 
181  libmesh_call_mpi
182  (MPI_Probe (src_processor_id, tag.value(), this->get(), &stat));
183 
184  return stat;
185 }
186 
187 template<typename T>
188 inline Status Communicator::packed_range_probe (const unsigned int src_processor_id,
189  const MessageTag & tag,
190  bool & flag) const
191 {
192  LOG_SCOPE("packed_range_probe()", "Parallel");
193 
194  libmesh_experimental();
195 
196  Status stat((StandardType<typename Packing<T>::buffer_type>()));
197 
198  int int_flag;
199 
200  libmesh_call_mpi(MPI_Iprobe(src_processor_id,
201  tag.value(),
202  this->get(),
203  &int_flag,
204  stat.get()));
205 
206  flag = int_flag;
207 
208  return stat;
209 }
210 
211 
212 template<typename T>
213 inline void Communicator::send (const unsigned int dest_processor_id,
214  const std::basic_string<T> & buf,
215  const MessageTag & tag) const
216 {
217  LOG_SCOPE("send()", "Parallel");
218 
219  T * dataptr = buf.empty() ? nullptr : const_cast<T *>(buf.data());
220 
221  libmesh_call_mpi
222  (((this->send_mode() == SYNCHRONOUS) ?
223  MPI_Ssend : MPI_Send) (dataptr,
224  cast_int<int>(buf.size()),
225  StandardType<T>(dataptr),
226  dest_processor_id,
227  tag.value(),
228  this->get()));
229 }
230 
231 
232 
233 template <typename T>
234 inline void Communicator::send (const unsigned int dest_processor_id,
235  const std::basic_string<T> & buf,
236  Request & req,
237  const MessageTag & tag) const
238 {
239  LOG_SCOPE("send()", "Parallel");
240 
241  T * dataptr = buf.empty() ? nullptr : const_cast<T *>(buf.data());
242 
243  libmesh_call_mpi
244  (((this->send_mode() == SYNCHRONOUS) ?
245  MPI_Issend : MPI_Isend) (dataptr,
246  cast_int<int>(buf.size()),
247  StandardType<T>(dataptr),
248  dest_processor_id,
249  tag.value(),
250  this->get(),
251  req.get()));
252 
253  // The MessageTag should stay registered for the Request lifetime
256 }
257 
258 
259 
260 template <typename T>
261 inline void Communicator::send (const unsigned int dest_processor_id,
262  const T & buf,
263  const MessageTag & tag) const
264 {
265  LOG_SCOPE("send()", "Parallel");
266 
267  T * dataptr = const_cast<T*> (&buf);
268 
269  libmesh_call_mpi
270  (((this->send_mode() == SYNCHRONOUS) ?
271  MPI_Ssend : MPI_Send) (dataptr,
272  1,
273  StandardType<T>(dataptr),
274  dest_processor_id,
275  tag.value(),
276  this->get()));
277 }
278 
279 
280 
281 template <typename T>
282 inline void Communicator::send (const unsigned int dest_processor_id,
283  const T & buf,
284  Request & req,
285  const MessageTag & tag) const
286 {
287  LOG_SCOPE("send()", "Parallel");
288 
289  T * dataptr = const_cast<T*>(&buf);
290 
291  libmesh_call_mpi
292  (((this->send_mode() == SYNCHRONOUS) ?
293  MPI_Issend : MPI_Isend) (dataptr,
294  1,
295  StandardType<T>(dataptr),
296  dest_processor_id,
297  tag.value(),
298  this->get(),
299  req.get()));
300 
301  // The MessageTag should stay registered for the Request lifetime
304 }
305 
306 
307 
308 template <typename T, typename C, typename A>
309 inline void Communicator::send (const unsigned int dest_processor_id,
310  const std::set<T,C,A> & buf,
311  const MessageTag & tag) const
312 {
313  this->send(dest_processor_id, buf,
314  StandardType<T>(buf.empty() ? nullptr : &(*buf.begin())), tag);
315 }
316 
317 
318 
319 template <typename T, typename C, typename A>
320 inline void Communicator::send (const unsigned int dest_processor_id,
321  const std::set<T,C,A> & buf,
322  Request & req,
323  const MessageTag & tag) const
324 {
325  this->send(dest_processor_id, buf,
326  StandardType<T>(buf.empty() ? nullptr : &(*buf.begin())), req, tag);
327 }
328 
329 
330 
331 template <typename T, typename C, typename A>
332 inline void Communicator::send (const unsigned int dest_processor_id,
333  const std::set<T,C,A> & buf,
334  const DataType & type,
335  const MessageTag & tag) const
336 {
337  LOG_SCOPE("send()", "Parallel");
338 
339  std::vector<T> vecbuf(buf.begin(), buf.end());
340  this->send(dest_processor_id, vecbuf, type, tag);
341 }
342 
343 
344 
345 template <typename T, typename C, typename A>
346 inline void Communicator::send (const unsigned int dest_processor_id,
347  const std::set<T,C,A> & buf,
348  const DataType & type,
349  Request & req,
350  const MessageTag & tag) const
351 {
352  LOG_SCOPE("send()", "Parallel");
353 
354  // Allocate temporary buffer on the heap so it lives until after
355  // the non-blocking send completes
356  std::vector<T> * vecbuf =
357  new std::vector<T,A>(buf.begin(), buf.end());
358 
359  // Make the Request::wait() handle deleting the buffer
360  req.add_post_wait_work
361  (new Parallel::PostWaitDeleteBuffer<std::vector<T,A>>(vecbuf));
362 
363  this->send(dest_processor_id, *vecbuf, type, req, tag);
364 }
365 
366 
367 
368 template <typename T, typename A>
369 inline void Communicator::send (const unsigned int dest_processor_id,
370  const std::vector<T,A> & buf,
371  const MessageTag & tag) const
372 {
373  this->send(dest_processor_id, buf,
374  StandardType<T>(buf.empty() ? nullptr : &buf.front()), tag);
375 }
376 
377 
378 
379 template <typename T, typename A>
380 inline void Communicator::send (const unsigned int dest_processor_id,
381  const std::vector<T,A> & buf,
382  Request & req,
383  const MessageTag & tag) const
384 {
385  this->send(dest_processor_id, buf,
386  StandardType<T>(buf.empty() ? nullptr : &buf.front()), req, tag);
387 }
388 
389 
390 
391 template <typename T, typename A>
392 inline void Communicator::send (const unsigned int dest_processor_id,
393  const std::vector<T,A> & buf,
394  const DataType & type,
395  const MessageTag & tag) const
396 {
397  LOG_SCOPE("send()", "Parallel");
398 
399  libmesh_call_mpi
400  (((this->send_mode() == SYNCHRONOUS) ?
401  MPI_Ssend : MPI_Send) (buf.empty() ? nullptr : const_cast<T*>(buf.data()),
402  cast_int<int>(buf.size()),
403  type,
404  dest_processor_id,
405  tag.value(),
406  this->get()));
407 }
408 
409 
410 
411 template <typename T, typename A>
412 inline void Communicator::send (const unsigned int dest_processor_id,
413  const std::vector<T,A> & buf,
414  const DataType & type,
415  Request & req,
416  const MessageTag & tag) const
417 {
418  LOG_SCOPE("send()", "Parallel");
419 
420  libmesh_call_mpi
421  (((this->send_mode() == SYNCHRONOUS) ?
422  MPI_Issend : MPI_Isend) (buf.empty() ? nullptr : const_cast<T*>(buf.data()),
423  cast_int<int>(buf.size()),
424  type,
425  dest_processor_id,
426  tag.value(),
427  this->get(),
428  req.get()));
429 
430  // The MessageTag should stay registered for the Request lifetime
431  req.add_post_wait_work
432  (new Parallel::PostWaitDereferenceTag(tag));
433 }
434 
435 
436 
437 template <typename T, typename A1, typename A2>
438 inline void Communicator::send (const unsigned int dest_processor_id,
439  const std::vector<std::vector<T,A1>,A2> & buf,
440  const MessageTag & tag) const
441 {
442  this->send(dest_processor_id, buf,
443  StandardType<T>((buf.empty() || buf.front().empty()) ?
444  nullptr : &(buf.front().front())), tag);
445 }
446 
447 
448 
449 template <typename T, typename A1, typename A2>
450 inline void Communicator::send (const unsigned int dest_processor_id,
451  const std::vector<std::vector<T,A1>,A2> & buf,
452  Request & req,
453  const MessageTag & tag) const
454 {
455  this->send(dest_processor_id, buf,
456  StandardType<T>((buf.empty() || buf.front().empty()) ?
457  nullptr : &(buf.front().front())), req, tag);
458 }
459 
460 
461 
462 template <typename T, typename A1, typename A2>
463 inline void Communicator::send (const unsigned int dest_processor_id,
464  const std::vector<std::vector<T,A1>,A2> & buf,
465  const DataType & type,
466  const MessageTag & tag) const
467 {
468  // We'll avoid redundant code (at the cost of using heap rather
469  // than stack buffer allocation) by reusing the non-blocking send
470  Request req;
471  this->send(dest_processor_id, buf, type, req, tag);
472  req.wait();
473 }
474 
475 
476 
477 template <typename T, typename A1, typename A2>
478 inline void Communicator::send (const unsigned int dest_processor_id,
479  const std::vector<std::vector<T,A1>,A2> & send_vecs,
480  const DataType & type,
481  Request & req,
482  const MessageTag & tag) const
483 {
484  // temporary buffer - this will be sized in bytes
485  // and manipulated with MPI_Pack
486  std::vector<char> * sendbuf = new std::vector<char>();
487 
488  // figure out how many bytes we need to pack all the data
489  int packedsize=0;
490 
491  // The outer buffer size
492  libmesh_call_mpi
493  (MPI_Pack_size (1,
495  this->get(),
496  &packedsize));
497 
498  int sendsize = packedsize;
499 
500  const std::size_t n_vecs = send_vecs.size();
501 
502  for (std::size_t i = 0; i != n_vecs; ++i)
503  {
504  // The size of the ith inner buffer
505  libmesh_call_mpi
506  (MPI_Pack_size (1,
508  this->get(),
509  &packedsize));
510 
511  sendsize += packedsize;
512 
513  // The data for each inner buffer
514  libmesh_call_mpi
515  (MPI_Pack_size (libMesh::cast_int<int>(send_vecs[i].size()), type,
516  this->get(), &packedsize));
517 
518  sendsize += packedsize;
519  }
520 
521  libmesh_assert (sendsize /* should at least be 1! */);
522  sendbuf->resize (sendsize);
523 
524  // Pack the send buffer
525  int pos=0;
526 
527  // ... the size of the outer buffer
528  const int mpi_n_vecs = libMesh::cast_int<int>(n_vecs);
529 
530  libmesh_call_mpi
531  (MPI_Pack (&mpi_n_vecs, 1,
533  sendbuf->data(), sendsize, &pos, this->get()));
534 
535  for (std::size_t i = 0; i != n_vecs; ++i)
536  {
537  // ... the size of the ith inner buffer
538  const int subvec_size = libMesh::cast_int<int>(send_vecs[i].size());
539 
540  libmesh_call_mpi
541  (MPI_Pack (&subvec_size, 1, libMesh::Parallel::StandardType<unsigned int>(),
542  sendbuf->data(), sendsize, &pos, this->get()));
543 
544  // ... the contents of the ith inner buffer
545  if (!send_vecs[i].empty())
546  libmesh_call_mpi
547  (MPI_Pack (const_cast<T*>(send_vecs[i].data()),
548  libMesh::cast_int<int>(subvec_size), type,
549  sendbuf->data(), sendsize, &pos, this->get()));
550  }
551 
552  libmesh_assert_equal_to (pos, sendsize);
553 
554  req.add_post_wait_work
555  (new Parallel::PostWaitDeleteBuffer<std::vector<char>> (sendbuf));
556 
557  this->send (dest_processor_id, *sendbuf, MPI_PACKED, req, tag);
558 }
559 
560 
561 template <typename Context, typename Iter>
562 inline void Communicator::send_packed_range (const unsigned int dest_processor_id,
563  const Context * context,
564  Iter range_begin,
565  const Iter range_end,
566  const MessageTag & tag) const
567 {
568  // We will serialize variable size objects from *range_begin to
569  // *range_end as a sequence of plain data (e.g. ints) in this buffer
570  typedef typename std::iterator_traits<Iter>::value_type T;
571 
572  std::size_t total_buffer_size =
573  Parallel::packed_range_size (context, range_begin, range_end);
574 
575  this->send(dest_processor_id, total_buffer_size, tag);
576 
577 #ifdef DEBUG
578  std::size_t used_buffer_size = 0;
579 #endif
580 
581  while (range_begin != range_end)
582  {
583  libmesh_assert_greater (std::distance(range_begin, range_end), 0);
584 
585  std::vector<typename Parallel::Packing<T>::buffer_type> buffer;
586 
587  const Iter next_range_begin = Parallel::pack_range
588  (context, range_begin, range_end, buffer);
589 
590  libmesh_assert_greater (std::distance(range_begin, next_range_begin), 0);
591 
592  range_begin = next_range_begin;
593 
594 #ifdef DEBUG
595  used_buffer_size += buffer.size();
596 #endif
597 
598  // Blocking send of the buffer
599  this->send(dest_processor_id, buffer, tag);
600  }
601 
602 #ifdef DEBUG
603  libmesh_assert_equal_to(used_buffer_size, total_buffer_size);
604 #endif
605 }
606 
607 
608 template <typename Context, typename Iter>
609 inline void Communicator::send_packed_range (const unsigned int dest_processor_id,
610  const Context * context,
611  Iter range_begin,
612  const Iter range_end,
613  Request & req,
614  const MessageTag & tag) const
615 {
616  // Allocate a buffer on the heap so we don't have to free it until
617  // after the Request::wait()
618  typedef typename std::iterator_traits<Iter>::value_type T;
619  typedef typename Parallel::Packing<T>::buffer_type buffer_t;
620 
621  std::size_t total_buffer_size =
622  Parallel::packed_range_size (context, range_begin, range_end);
623 
624  // That local variable will be gone soon; we need a send buffer that
625  // will stick around. I heard you like buffering so I put a buffer
626  // for your buffer size so you can buffer the size of your buffer.
627  std::size_t * total_buffer_size_buffer = new std::size_t;
628  *total_buffer_size_buffer = total_buffer_size;
629 
630  // Delete the buffer size's buffer when we're done
631  Request intermediate_req = request();
632  intermediate_req.add_post_wait_work
633  (new Parallel::PostWaitDeleteBuffer<std::size_t>(total_buffer_size_buffer));
634  this->send(dest_processor_id, *total_buffer_size_buffer, intermediate_req, tag);
635 
636  // And don't finish up the full request until we're done with its
637  // dependencies
638  req.add_prior_request(intermediate_req);
639 
640 #ifdef DEBUG
641  std::size_t used_buffer_size = 0;
642 #endif
643 
644  while (range_begin != range_end)
645  {
646  libmesh_assert_greater (std::distance(range_begin, range_end), 0);
647 
648  std::vector<buffer_t> * buffer = new std::vector<buffer_t>();
649 
650  const Iter next_range_begin =
651  Parallel::pack_range(context, range_begin, range_end,
652  *buffer);
653 
654  libmesh_assert_greater (std::distance(range_begin, next_range_begin), 0);
655 
656  range_begin = next_range_begin;
657 
658 #ifdef DEBUG
659  used_buffer_size += buffer->size();
660 #endif
661 
662  Request next_intermediate_req;
663 
664  Request * my_req = (range_begin == range_end) ? &req : &next_intermediate_req;
665 
666  // Make the Request::wait() handle deleting the buffer
667  my_req->add_post_wait_work
668  (new Parallel::PostWaitDeleteBuffer<std::vector<buffer_t>>
669  (buffer));
670 
671  // Non-blocking send of the buffer
672  this->send(dest_processor_id, *buffer, *my_req, tag);
673 
674  if (range_begin != range_end)
675  req.add_prior_request(*my_req);
676  }
677 }
678 
679 
680 
681 
682 
683 
684 
685 template <typename Context, typename Iter>
686 inline void Communicator::nonblocking_send_packed_range (const unsigned int dest_processor_id,
687  const Context * context,
688  Iter range_begin,
689  const Iter range_end,
690  Request & req,
691  const MessageTag & tag) const
692 {
693  libmesh_experimental();
694 
695  // Allocate a buffer on the heap so we don't have to free it until
696  // after the Request::wait()
697  typedef typename std::iterator_traits<Iter>::value_type T;
698  typedef typename Parallel::Packing<T>::buffer_type buffer_t;
699 
700  if (range_begin != range_end)
701  {
702  std::vector<buffer_t> * buffer = new std::vector<buffer_t>();
703 
704  range_begin =
705  Parallel::pack_range(context,
706  range_begin,
707  range_end,
708  *buffer,
709  // MPI-2 can only use integers for size
711 
712  if (range_begin != range_end)
713  libmesh_error_msg("Non-blocking packed range sends cannot exceed " << std::numeric_limits<int>::max() << "in size");
714 
715  // Make the Request::wait() handle deleting the buffer
717  (new Parallel::PostWaitDeleteBuffer<std::vector<buffer_t>>
718  (buffer));
719 
720  // Non-blocking send of the buffer
721  this->send(dest_processor_id, *buffer, req, tag);
722  }
723 }
724 
725 
726 template <typename T>
727 inline Status Communicator::receive (const unsigned int src_processor_id,
728  std::basic_string<T> & buf,
729  const MessageTag & tag) const
730 {
731  std::vector<T> tempbuf; // Officially C++ won't let us get a
732  // modifiable array from a string
733 
734  Status stat = this->receive(src_processor_id, tempbuf, tag);
735  buf.assign(tempbuf.begin(), tempbuf.end());
736  return stat;
737 }
738 
739 
740 
741 template <typename T>
742 inline void Communicator::receive (const unsigned int src_processor_id,
743  std::basic_string<T> & buf,
744  Request & req,
745  const MessageTag & tag) const
746 {
747  // Officially C++ won't let us get a modifiable array from a
748  // string, and we can't even put one on the stack for the
749  // non-blocking case.
750  std::vector<T> * tempbuf = new std::vector<T>();
751 
752  // We can clear the string, but the Request::wait() will need to
753  // handle copying our temporary buffer to it
754  buf.clear();
755 
757  (new Parallel::PostWaitCopyBuffer<std::vector<T>,
758  std::back_insert_iterator<std::basic_string<T>>>
759  (tempbuf, std::back_inserter(buf)));
760 
761  // Make the Request::wait() then handle deleting the buffer
763  (new Parallel::PostWaitDeleteBuffer<std::vector<T>>(tempbuf));
764 
765  this->receive(src_processor_id, tempbuf, req, tag);
766 }
767 
768 
769 
770 template <typename T>
771 inline Status Communicator::receive (const unsigned int src_processor_id,
772  T & buf,
773  const MessageTag & tag) const
774 {
775  LOG_SCOPE("receive()", "Parallel");
776 
777  // Get the status of the message, explicitly provide the
778  // datatype so we can later query the size
779  Status stat(this->probe(src_processor_id, tag), StandardType<T>(&buf));
780 
781  libmesh_call_mpi
782  (MPI_Recv (&buf, 1, StandardType<T>(&buf), src_processor_id,
783  tag.value(), this->get(), stat.get()));
784 
785  return stat;
786 }
787 
788 
789 
790 template <typename T>
791 inline void Communicator::receive (const unsigned int src_processor_id,
792  T & buf,
793  Request & req,
794  const MessageTag & tag) const
795 {
796  LOG_SCOPE("receive()", "Parallel");
797 
798  libmesh_call_mpi
799  (MPI_Irecv (&buf, 1, StandardType<T>(&buf), src_processor_id,
800  tag.value(), this->get(), req.get()));
801 
802  // The MessageTag should stay registered for the Request lifetime
805 }
806 
807 
808 
809 template <typename T, typename C, typename A>
810 inline Status Communicator::receive (const unsigned int src_processor_id,
811  std::set<T,C,A> & buf,
812  const MessageTag & tag) const
813 {
814  return this->receive
815  (src_processor_id, buf,
816  StandardType<T>(buf.empty() ? nullptr : &(*buf.begin())), tag);
817 }
818 
819 
820 
821 /*
822  * No non-blocking receives of std::set until we figure out how to
823  * resize the temporary buffer
824  */
825 #if 0
826 template <typename T, typename C, typename A>
827 inline void Communicator::receive (const unsigned int src_processor_id,
828  std::set<T,C,A> & buf,
829  Request & req,
830  const MessageTag & tag) const
831 {
832  this->receive (src_processor_id, buf,
833  StandardType<T>(buf.empty() ? nullptr : &(*buf.begin())), req, tag);
834 }
835 #endif // 0
836 
837 
838 
839 template <typename T, typename C, typename A>
840 inline Status Communicator::receive (const unsigned int src_processor_id,
841  std::set<T,C,A> & buf,
842  const DataType & type,
843  const MessageTag & tag) const
844 {
845  LOG_SCOPE("receive()", "Parallel");
846 
847  std::vector<T> vecbuf;
848  Status stat = this->receive(src_processor_id, vecbuf, type, tag);
849  buf.clear();
850  buf.insert(vecbuf.begin(), vecbuf.end());
851 
852  return stat;
853 }
854 
855 
856 
857 /*
858  * No non-blocking receives of std::set until we figure out how to
859  * resize the temporary buffer
860  */
861 #if 0
862 template <typename T, typename C, typename A>
863 inline void Communicator::receive (const unsigned int src_processor_id,
864  std::set<T,C,A> & buf,
865  const DataType & type,
866  Request & req,
867  const MessageTag & tag) const
868 {
869  LOG_SCOPE("receive()", "Parallel");
870 
871  // Allocate temporary buffer on the heap so it lives until after
872  // the non-blocking send completes
873  std::vector<T> * vecbuf = new std::vector<T>();
874 
875  // We can clear the set, but the Request::wait() will need to
876  // handle copying our temporary buffer to it
877  buf.clear();
878 
879  req.add_post_wait_work
880  (new Parallel::PostWaitCopyBuffer<std::vector<T>,
881  std::insert_iterator<std::set<T,C,A>>>
882  (*vecbuf, std::inserter(buf,buf.end())));
883 
884  // Make the Request::wait() then handle deleting the buffer
885  req.add_post_wait_work
886  (new Parallel::PostWaitDeleteBuffer<std::vector<T>>(vecbuf));
887 
888  this->receive(src_processor_id, *vecbuf, type, req, tag);
889 }
890 #endif // 0
891 
892 
893 
894 template <typename T, typename A>
895 inline Status Communicator::receive (const unsigned int src_processor_id,
896  std::vector<T,A> & buf,
897  const MessageTag & tag) const
898 {
899  return this->receive
900  (src_processor_id, buf,
901  StandardType<T>(buf.empty() ? nullptr : &(*buf.begin())), tag);
902 }
903 
904 
905 
906 template <typename T, typename A>
907 inline void Communicator::receive (const unsigned int src_processor_id,
908  std::vector<T,A> & buf,
909  Request & req,
910  const MessageTag & tag) const
911 {
912  this->receive (src_processor_id, buf,
913  StandardType<T>(buf.empty() ? nullptr : &(*buf.begin())), req, tag);
914 }
915 
916 
917 
918 template <typename T, typename A>
919 inline Status Communicator::receive (const unsigned int src_processor_id,
920  std::vector<T,A> & buf,
921  const DataType & type,
922  const MessageTag & tag) const
923 {
924  LOG_SCOPE("receive()", "Parallel");
925 
926  // Get the status of the message, explicitly provide the
927  // datatype so we can later query the size
928  Status stat(this->probe(src_processor_id, tag), type);
929 
930  buf.resize(stat.size());
931 
932  // Use stat.source() and stat.tag() in the receive - if
933  // src_processor_id is or tag is "any" then we want to be sure we
934  // try to receive the same message we just probed.
935  libmesh_call_mpi
936  (MPI_Recv (buf.empty() ? nullptr : buf.data(),
937  cast_int<int>(buf.size()), type, stat.source(),
938  stat.tag(), this->get(), stat.get()));
939 
940  libmesh_assert_equal_to (stat.size(), buf.size());
941 
942  return stat;
943 }
944 
945 
946 
947 template <typename T, typename A>
948 inline void Communicator::receive (const unsigned int src_processor_id,
949  std::vector<T,A> & buf,
950  const DataType & type,
951  Request & req,
952  const MessageTag & tag) const
953 {
954  LOG_SCOPE("receive()", "Parallel");
955 
956  libmesh_call_mpi
957  (MPI_Irecv (buf.empty() ? nullptr : buf.data(),
958  cast_int<int>(buf.size()), type, src_processor_id,
959  tag.value(), this->get(), req.get()));
960 
961  // The MessageTag should stay registered for the Request lifetime
962  req.add_post_wait_work
963  (new Parallel::PostWaitDereferenceTag(tag));
964 }
965 
966 
967 
968 template <typename T, typename A1, typename A2>
969 inline Status Communicator::receive (const unsigned int src_processor_id,
970  std::vector<std::vector<T,A1>,A2> & buf,
971  const MessageTag & tag) const
972 {
973  return this->receive
974  (src_processor_id, buf,
975  StandardType<T>((buf.empty() || buf.front().empty()) ?
976  nullptr : &(buf.front().front())), tag);
977 }
978 
979 
980 
981 template <typename T, typename A1, typename A2>
982 inline void Communicator::receive (const unsigned int src_processor_id,
983  std::vector<std::vector<T,A1>,A2> & buf,
984  Request & req,
985  const MessageTag & tag) const
986 {
987  this->receive (src_processor_id, buf,
988  StandardType<T>((buf.empty() || buf.front().empty()) ?
989  nullptr : &(buf.front().front())), req, tag);
990 }
991 
992 
993 
994 template <typename T, typename A1, typename A2>
995 inline Status Communicator::receive (const unsigned int src_processor_id,
996  std::vector<std::vector<T,A1>,A2> & recv,
997  const DataType & type,
998  const MessageTag & tag) const
999 {
1000  // temporary buffer - this will be sized in bytes
1001  // and manipulated with MPI_Unpack
1002  std::vector<char> recvbuf;
1003 
1004  Status stat = this->receive (src_processor_id, recvbuf, MPI_PACKED, tag);
1005 
1006  // We should at least have one header datum, for outer vector size
1007  libmesh_assert (!recvbuf.empty());
1008 
1009  // Unpack the received buffer
1010  int bufsize = libMesh::cast_int<int>(recvbuf.size());
1011  int recvsize, pos=0;
1012  libmesh_call_mpi
1013  (MPI_Unpack (recvbuf.data(), bufsize, &pos,
1015  this->get()));
1016 
1017  // ... size the outer buffer
1018  recv.resize (recvsize);
1019 
1020  const std::size_t n_vecs = recvsize;
1021  for (std::size_t i = 0; i != n_vecs; ++i)
1022  {
1023  int subvec_size;
1024 
1025  libmesh_call_mpi
1026  (MPI_Unpack (recvbuf.data(), bufsize, &pos,
1027  &subvec_size, 1,
1029  this->get()));
1030 
1031  // ... size the inner buffer
1032  recv[i].resize (subvec_size);
1033 
1034  // ... unpack the inner buffer if it is not empty
1035  if (!recv[i].empty())
1036  libmesh_call_mpi
1037  (MPI_Unpack (recvbuf.data(), bufsize, &pos, recv[i].data(),
1038  subvec_size, type, this->get()));
1039  }
1040 
1041  return stat;
1042 }
1043 
1044 
1045 
1046 // FIXME - non-blocking receive of vector-of-vectors is currently unimplemented
1047 /*
1048 template <typename T, typename A1, typename A2>
1049 inline void Communicator::receive (const unsigned int src_processor_id,
1050  std::vector<std::vector<T,A1>,A2> & buf,
1051  const DataType & type,
1052  Request & req,
1053  const MessageTag & tag) const
1054 {
1055 }
1056 */
1057 
1058 
1059 template <typename Context, typename OutputIter, typename T>
1060 inline void Communicator::receive_packed_range (const unsigned int src_processor_id,
1061  Context * context,
1062  OutputIter out_iter,
1063  const T * output_type,
1064  const MessageTag & tag) const
1065 {
1066  typedef typename Parallel::Packing<T>::buffer_type buffer_t;
1067 
1068  // Receive serialized variable size objects as sequences of buffer_t
1069  std::size_t total_buffer_size = 0;
1070  Status stat = this->receive(src_processor_id, total_buffer_size, tag);
1071 
1072  // Use stat.source() and stat.tag() in subsequent receives - if
1073  // src_processor_id is or tag is "any" then we want to be sure we
1074  // try to receive messages all corresponding to the same send.
1075 
1076  std::size_t received_buffer_size = 0;
1077  while (received_buffer_size < total_buffer_size)
1078  {
1079  std::vector<buffer_t> buffer;
1080  this->receive(stat.source(), buffer, MessageTag(stat.tag()));
1081  received_buffer_size += buffer.size();
1083  (buffer, context, out_iter, output_type);
1084  }
1085 }
1086 
1087 
1088 
1089 // template <typename Context, typename OutputIter>
1090 // inline void Communicator::receive_packed_range (const unsigned int src_processor_id,
1091 // Context * context,
1092 // OutputIter out_iter,
1093 // Request & req,
1094 // const MessageTag & tag) const
1095 // {
1096 // typedef typename std::iterator_traits<OutputIter>::value_type T;
1097 // typedef typename Parallel::Packing<T>::buffer_type buffer_t;
1098 //
1099 // // Receive serialized variable size objects as a sequence of
1100 // // buffer_t.
1101 // // Allocate a buffer on the heap so we don't have to free it until
1102 // // after the Request::wait()
1103 // std::vector<buffer_t> * buffer = new std::vector<buffer_t>();
1104 // this->receive(src_processor_id, *buffer, req, tag);
1105 //
1106 // // Make the Request::wait() handle unpacking the buffer
1107 // req.add_post_wait_work
1108 // (new Parallel::PostWaitUnpackBuffer<std::vector<buffer_t>, Context, OutputIter>
1109 // (buffer, context, out_iter));
1110 //
1111 // // Make the Request::wait() then handle deleting the buffer
1112 // req.add_post_wait_work
1113 // (new Parallel::PostWaitDeleteBuffer<std::vector<buffer_t>>(buffer));
1114 // }
1115 
1116 template <typename Context, typename OutputIter, typename T>
1117 inline void Communicator::nonblocking_receive_packed_range (const unsigned int src_processor_id,
1118  Context * context,
1119  OutputIter out,
1120  const T * /* output_type */,
1121  Request & req,
1122  Status & stat,
1123  const MessageTag & tag) const
1124 {
1125  libmesh_experimental();
1126 
1127  typedef typename Parallel::Packing<T>::buffer_type buffer_t;
1128 
1129  // Receive serialized variable size objects as a sequence of
1130  // buffer_t.
1131  // Allocate a buffer on the heap so we don't have to free it until
1132  // after the Request::wait()
1133  std::vector<buffer_t> * buffer = new std::vector<buffer_t>(stat.size());
1134  this->receive(src_processor_id, *buffer, req, tag);
1135 
1136  // Make the Request::wait() handle unpacking the buffer
1137  req.add_post_wait_work
1138  (new Parallel::PostWaitUnpackBuffer<std::vector<buffer_t>, Context, OutputIter, T>(*buffer, context, out));
1139 
1140  // Make the Request::wait() then handle deleting the buffer
1141  req.add_post_wait_work
1142  (new Parallel::PostWaitDeleteBuffer<std::vector<buffer_t>>(buffer));
1143 }
1144 
1145 
1146 
1147 template <typename T1, typename T2, typename A1, typename A2>
1148 inline void Communicator::send_receive(const unsigned int dest_processor_id,
1149  const std::vector<T1,A1> & sendvec,
1150  const DataType & type1,
1151  const unsigned int source_processor_id,
1152  std::vector<T2,A2> & recv,
1153  const DataType & type2,
1154  const MessageTag & send_tag,
1155  const MessageTag & recv_tag) const
1156 {
1157  LOG_SCOPE("send_receive()", "Parallel");
1158 
1159  if (dest_processor_id == this->rank() &&
1160  source_processor_id == this->rank())
1161  {
1162  recv = sendvec;
1163  return;
1164  }
1165 
1166  Parallel::Request req;
1167 
1168  this->send (dest_processor_id, sendvec, type1, req, send_tag);
1169 
1170  this->receive (source_processor_id, recv, type2, recv_tag);
1171 
1172  req.wait();
1173 }
1174 
1175 
1176 
1177 template <typename T1, typename T2>
1178 inline void Communicator::send_receive(const unsigned int dest_processor_id,
1179  const T1 & sendvec,
1180  const unsigned int source_processor_id,
1181  T2 & recv,
1182  const MessageTag & send_tag,
1183  const MessageTag & recv_tag) const
1184 {
1185  LOG_SCOPE("send_receive()", "Parallel");
1186 
1187  if (dest_processor_id == this->rank() &&
1188  source_processor_id == this->rank())
1189  {
1190  recv = sendvec;
1191  return;
1192  }
1193 
1194  // MPI_STATUS_IGNORE is from MPI-2; using it with some versions of
1195  // MPICH may cause a crash:
1196  // https://bugzilla.mcs.anl.gov/globus/show_bug.cgi?id=1798
1197  libmesh_call_mpi
1198  (MPI_Sendrecv(const_cast<T1*>(&sendvec), 1, StandardType<T1>(&sendvec),
1199  dest_processor_id, send_tag.value(), &recv, 1,
1200  StandardType<T2>(&recv), source_processor_id,
1201  recv_tag.value(), this->get(), MPI_STATUS_IGNORE));
1202 }
1203 
1204 
1205 
1206 // This is both a declaration and definition for a new overloaded
1207 // function template, so we have to re-specify the default
1208 // arguments.
1209 //
1210 // We specialize on the T1==T2 case so that we can handle
1211 // send_receive-to-self with a plain copy rather than going through
1212 // MPI.
1213 template <typename T, typename A>
1214 inline void Communicator::send_receive(const unsigned int dest_processor_id,
1215  const std::vector<T,A> & sendvec,
1216  const unsigned int source_processor_id,
1217  std::vector<T,A> & recv,
1218  const MessageTag & send_tag,
1219  const MessageTag & recv_tag) const
1220 {
1221  if (dest_processor_id == this->rank() &&
1222  source_processor_id == this->rank())
1223  {
1224  LOG_SCOPE("send_receive()", "Parallel");
1225  recv = sendvec;
1226  return;
1227  }
1228 
1229  const T* example = sendvec.empty() ?
1230  (recv.empty() ? nullptr : recv.data()) : sendvec.data();
1231 
1232  // Call the user-defined type version with automatic
1233  // type conversion based on template argument:
1234  this->send_receive (dest_processor_id, sendvec,
1235  StandardType<T>(example),
1236  source_processor_id, recv,
1237  StandardType<T>(example),
1238  send_tag, recv_tag);
1239 }
1240 
1241 
1242 // This is both a declaration and definition for a new overloaded
1243 // function template, so we have to re-specify the default arguments
1244 template <typename T1, typename T2, typename A1, typename A2>
1245 inline void Communicator::send_receive(const unsigned int dest_processor_id,
1246  const std::vector<T1,A1> & sendvec,
1247  const unsigned int source_processor_id,
1248  std::vector<T2,A2> & recv,
1249  const MessageTag & send_tag,
1250  const MessageTag & recv_tag) const
1251 {
1252  // Call the user-defined type version with automatic
1253  // type conversion based on template argument:
1254  this->send_receive (dest_processor_id, sendvec,
1255  StandardType<T1>(sendvec.empty() ? nullptr : sendvec.data()),
1256  source_processor_id, recv,
1257  StandardType<T2>(recv.empty() ? nullptr : recv.data()),
1258  send_tag, recv_tag);
1259 }
1260 
1261 
1262 
1263 
1264 template <typename T1, typename T2, typename A1, typename A2, typename A3, typename A4>
1265 inline void Communicator::send_receive(const unsigned int dest_processor_id,
1266  const std::vector<std::vector<T1,A1>,A2> & sendvec,
1267  const unsigned int source_processor_id,
1268  std::vector<std::vector<T2,A3>,A4> & recv,
1269  const MessageTag & /* send_tag */,
1270  const MessageTag & /* recv_tag */) const
1271 {
1272  // FIXME - why aren't we honoring send_tag and recv_tag here?
1273  send_receive_vec_of_vec
1274  (dest_processor_id, sendvec, source_processor_id, recv,
1275  no_tag, any_tag, *this);
1276 }
1277 
1278 
1279 
1280 // This is both a declaration and definition for a new overloaded
1281 // function template, so we have to re-specify the default arguments
1282 template <typename T, typename A1, typename A2>
1283 inline void Communicator::send_receive(const unsigned int dest_processor_id,
1284  const std::vector<std::vector<T,A1>,A2> & sendvec,
1285  const unsigned int source_processor_id,
1286  std::vector<std::vector<T,A1>,A2> & recv,
1287  const MessageTag & /* send_tag */,
1288  const MessageTag & /* recv_tag */) const
1289 {
1290  // FIXME - why aren't we honoring send_tag and recv_tag here?
1291  send_receive_vec_of_vec
1292  (dest_processor_id, sendvec, source_processor_id, recv,
1293  no_tag, any_tag, *this);
1294 }
1295 
1296 
1297 
1298 
1299 template <typename Context1, typename RangeIter, typename Context2,
1300  typename OutputIter, typename T>
1301 inline void
1302 Communicator::send_receive_packed_range (const unsigned int dest_processor_id,
1303  const Context1 * context1,
1304  RangeIter send_begin,
1305  const RangeIter send_end,
1306  const unsigned int source_processor_id,
1307  Context2 * context2,
1308  OutputIter out_iter,
1309  const T * output_type,
1310  const MessageTag & send_tag,
1311  const MessageTag & recv_tag) const
1312 {
1313  LOG_SCOPE("send_receive()", "Parallel");
1314 
1315  Parallel::Request req;
1316 
1317  this->send_packed_range (dest_processor_id, context1, send_begin, send_end,
1318  req, send_tag);
1319 
1320  this->receive_packed_range (source_processor_id, context2, out_iter,
1321  output_type, recv_tag);
1322 
1323  req.wait();
1324 }
1325 
1326 
1327 
1328 template <typename Context, typename Iter>
1329 inline void Communicator::nonblocking_send_packed_range (const unsigned int dest_processor_id,
1330  const Context * context,
1331  Iter range_begin,
1332  const Iter range_end,
1333  Request & req,
1334  std::shared_ptr<std::vector<typename Parallel::Packing<typename std::iterator_traits<Iter>::value_type>::buffer_type>> & buffer,
1335  const MessageTag & tag) const
1336 {
1337  libmesh_experimental();
1338 
1339  // Allocate a buffer on the heap so we don't have to free it until
1340  // after the Request::wait()
1341  typedef typename std::iterator_traits<Iter>::value_type T;
1342  typedef typename Parallel::Packing<T>::buffer_type buffer_t;
1343 
1344  if (range_begin != range_end)
1345  {
1346  if (buffer == nullptr)
1347  buffer = std::make_shared<std::vector<buffer_t>>();
1348  else
1349  buffer->clear();
1350 
1351  range_begin =
1352  Parallel::pack_range(context,
1353  range_begin,
1354  range_end,
1355  *buffer,
1356  // MPI-2 can only use integers for size
1358 
1359  if (range_begin != range_end)
1360  libmesh_error_msg("Non-blocking packed range sends cannot exceed " << std::numeric_limits<int>::max() << "in size");
1361 
1362  // Make it dereference the shared pointer (possibly freeing the buffer)
1363  req.add_post_wait_work
1364  (new Parallel::PostWaitDereferenceSharedPtr<std::vector<buffer_t>>(buffer));
1365 
1366  // Non-blocking send of the buffer
1367  this->send(dest_processor_id, *buffer, req, tag);
1368  }
1369 }
1370 
1371 
1372 
1373 template <typename T, typename A>
1374 inline void Communicator::allgather(const std::basic_string<T> & sendval,
1375  std::vector<std::basic_string<T>,A> & recv,
1376  const bool identical_buffer_sizes) const
1377 {
1378  LOG_SCOPE ("allgather()","Parallel");
1379 
1380  libmesh_assert(this->size());
1381  recv.assign(this->size(), "");
1382 
1383  // serial case
1384  if (this->size() < 2)
1385  {
1386  recv.resize(1);
1387  recv[0] = sendval;
1388  return;
1389  }
1390 
1391  std::vector<int>
1392  sendlengths (this->size(), 0),
1393  displacements(this->size(), 0);
1394 
1395  const int mysize = static_cast<int>(sendval.size());
1396 
1397  if (identical_buffer_sizes)
1398  sendlengths.assign(this->size(), mysize);
1399  else
1400  // first comm step to determine buffer sizes from all processors
1401  this->allgather(mysize, sendlengths);
1402 
1403  // Find the total size of the final array and
1404  // set up the displacement offsets for each processor
1405  unsigned int globalsize = 0;
1406  for (unsigned int i=0; i != this->size(); ++i)
1407  {
1408  displacements[i] = globalsize;
1409  globalsize += sendlengths[i];
1410  }
1411 
1412  // Check for quick return
1413  if (globalsize == 0)
1414  return;
1415 
1416  // monolithic receive buffer
1417  std::string r(globalsize, 0);
1418 
1419  // and get the data from the remote processors.
1420  libmesh_call_mpi
1421  (MPI_Allgatherv (const_cast<T*>(mysize ? sendval.data() : nullptr),
1422  mysize, StandardType<T>(),
1423  &r[0], sendlengths.data(), displacements.data(),
1424  StandardType<T>(), this->get()));
1425 
1426  // slice receive buffer up
1427  for (unsigned int i=0; i != this->size(); ++i)
1428  recv[i] = r.substr(displacements[i], sendlengths[i]);
1429 }
1430 
1431 
1432 
1433 template <>
1434 inline void Communicator::broadcast (bool & data, const unsigned int root_id) const
1435 {
1436  if (this->size() == 1)
1437  {
1438  libmesh_assert (!this->rank());
1439  libmesh_assert (!root_id);
1440  return;
1441  }
1442 
1443  libmesh_assert_less (root_id, this->size());
1444 
1445  LOG_SCOPE("broadcast()", "Parallel");
1446 
1447  // We don't want to depend on MPI-2 or C++ MPI, so we don't have
1448  // MPI::BOOL available
1449  char char_data = data;
1450 
1451  // Spread data to remote processors.
1452  libmesh_call_mpi
1453  (MPI_Bcast (&char_data, 1, StandardType<char>(&char_data),
1454  root_id, this->get()));
1455 
1456  data = char_data;
1457 }
1458 
1459 
1460 template <typename T>
1461 inline void Communicator::broadcast (std::basic_string<T> & data,
1462  const unsigned int root_id) const
1463 {
1464  if (this->size() == 1)
1465  {
1466  libmesh_assert (!this->rank());
1467  libmesh_assert (!root_id);
1468  return;
1469  }
1470 
1471  libmesh_assert_less (root_id, this->size());
1472 
1473  LOG_SCOPE("broadcast()", "Parallel");
1474 
1475  std::size_t data_size = data.size();
1476  this->broadcast(data_size, root_id);
1477 
1478  std::vector<T> data_c(data_size);
1479 #ifndef NDEBUG
1480  std::string orig(data);
1481 #endif
1482 
1483  if (this->rank() == root_id)
1484  for (std::size_t i=0; i<data.size(); i++)
1485  data_c[i] = data[i];
1486 
1487  this->broadcast (data_c, root_id);
1488 
1489  data.assign(data_c.begin(), data_c.end());
1490 
1491 #ifndef NDEBUG
1492  if (this->rank() == root_id)
1493  libmesh_assert_equal_to (data, orig);
1494 #endif
1495 }
1496 
1497 
1498 
1499 template <typename T, typename A>
1500 inline void Communicator::broadcast (std::vector<T,A> & data,
1501  const unsigned int root_id) const
1502 {
1503  if (this->size() == 1)
1504  {
1505  libmesh_assert (!this->rank());
1506  libmesh_assert (!root_id);
1507  return;
1508  }
1509 
1510  libmesh_assert_less (root_id, this->size());
1511 
1512  LOG_SCOPE("broadcast()", "Parallel");
1513 
1514  // and get the data from the remote processors.
1515  // Pass nullptr if our vector is empty.
1516  T * data_ptr = data.empty() ? nullptr : data.data();
1517 
1518  libmesh_call_mpi
1519  (MPI_Bcast (data_ptr, cast_int<int>(data.size()),
1520  StandardType<T>(data_ptr), root_id, this->get()));
1521 }
1522 
1523 
1524 template <typename T, typename A>
1525 inline void Communicator::broadcast (std::vector<std::basic_string<T>,A> & data,
1526  const unsigned int root_id) const
1527 {
1528  if (this->size() == 1)
1529  {
1530  libmesh_assert (!this->rank());
1531  libmesh_assert (!root_id);
1532  return;
1533  }
1534 
1535  libmesh_assert_less (root_id, this->size());
1536 
1537  LOG_SCOPE("broadcast()", "Parallel");
1538 
1539  std::size_t bufsize=0;
1540  if (root_id == this->rank())
1541  {
1542  for (std::size_t i=0; i<data.size(); ++i)
1543  bufsize += data[i].size() + 1; // Add one for the string length word
1544  }
1545  this->broadcast(bufsize, root_id);
1546 
1547  // Here we use unsigned int to store up to 32-bit characters
1548  std::vector<unsigned int> temp; temp.reserve(bufsize);
1549  // Pack the strings
1550  if (root_id == this->rank())
1551  {
1552  for (std::size_t i=0; i<data.size(); ++i)
1553  {
1554  temp.push_back(cast_int<unsigned int>(data[i].size()));
1555  for (std::size_t j=0; j != data[i].size(); ++j)
1560  temp.push_back(data[i][j]);
1561  }
1562  }
1563  else
1564  temp.resize(bufsize);
1565 
1566  // broad cast the packed strings
1567  this->broadcast(temp, root_id);
1568 
1569  // Unpack the strings
1570  if (root_id != this->rank())
1571  {
1572  data.clear();
1573  typename std::vector<unsigned int>::const_iterator iter = temp.begin();
1574  while (iter != temp.end())
1575  {
1576  std::size_t curr_len = *iter++;
1577  data.push_back(std::string(iter, iter+curr_len));
1578  iter += curr_len;
1579  }
1580  }
1581 }
1582 
1583 
1584 
1585 
1586 template <typename T, typename C, typename A>
1587 inline void Communicator::broadcast (std::set<T,C,A> & data,
1588  const unsigned int root_id) const
1589 {
1590  if (this->size() == 1)
1591  {
1592  libmesh_assert (!this->rank());
1593  libmesh_assert (!root_id);
1594  return;
1595  }
1596 
1597  libmesh_assert_less (root_id, this->size());
1598 
1599  LOG_SCOPE("broadcast()", "Parallel");
1600 
1601  std::vector<T> vecdata;
1602  if (this->rank() == root_id)
1603  vecdata.assign(data.begin(), data.end());
1604 
1605  std::size_t vecsize = vecdata.size();
1606  this->broadcast(vecsize, root_id);
1607  if (this->rank() != root_id)
1608  vecdata.resize(vecsize);
1609 
1610  this->broadcast(vecdata, root_id);
1611  if (this->rank() != root_id)
1612  {
1613  data.clear();
1614  data.insert(vecdata.begin(), vecdata.end());
1615  }
1616 }
1617 
1618 
1619 
1620 template <typename T1, typename T2, typename C, typename A>
1621 inline void Communicator::broadcast(std::map<T1,T2,C,A> & data,
1622  const unsigned int root_id) const
1623 {
1624  if (this->size() == 1)
1625  {
1626  libmesh_assert (!this->rank());
1627  libmesh_assert (!root_id);
1628  return;
1629  }
1630 
1631  libmesh_assert_less (root_id, this->size());
1632 
1633  LOG_SCOPE("broadcast()", "Parallel");
1634 
1635  std::size_t data_size=data.size();
1636  this->broadcast(data_size, root_id);
1637 
1638  std::vector<T1> pair_first; pair_first.reserve(data_size);
1639  std::vector<T2> pair_second; pair_first.reserve(data_size);
1640 
1641  if (root_id == this->rank())
1642  {
1643  for (const auto & pr : data)
1644  {
1645  pair_first.push_back(pr.first);
1646  pair_second.push_back(pr.second);
1647  }
1648  }
1649  else
1650  {
1651  pair_first.resize(data_size);
1652  pair_second.resize(data_size);
1653  }
1654 
1655  this->broadcast(pair_first, root_id);
1656  this->broadcast(pair_second, root_id);
1657 
1658  libmesh_assert(pair_first.size() == pair_first.size());
1659 
1660  if (this->rank() != root_id)
1661  {
1662  data.clear();
1663  for (std::size_t i=0; i<pair_first.size(); ++i)
1664  data[pair_first[i]] = pair_second[i];
1665  }
1666 }
1667 
1668 
1669 
1670 template <typename Context, typename OutputContext,
1671  typename Iter, typename OutputIter>
1672 inline void Communicator::broadcast_packed_range(const Context * context1,
1673  Iter range_begin,
1674  const Iter range_end,
1675  OutputContext * context2,
1676  OutputIter out_iter,
1677  const unsigned int root_id) const
1678 {
1679  typedef typename std::iterator_traits<Iter>::value_type T;
1680  typedef typename Parallel::Packing<T>::buffer_type buffer_t;
1681 
1682  do
1683  {
1684  // We will serialize variable size objects from *range_begin to
1685  // *range_end as a sequence of ints in this buffer
1686  std::vector<buffer_t> buffer;
1687 
1688  if (this->rank() == root_id)
1689  range_begin = Parallel::pack_range
1690  (context1, range_begin, range_end, buffer);
1691 
1692  // this->broadcast(vector) requires the receiving vectors to
1693  // already be the appropriate size
1694  std::size_t buffer_size = buffer.size();
1695  this->broadcast (buffer_size, root_id);
1696 
1697  // We continue until there's nothing left to broadcast
1698  if (!buffer_size)
1699  break;
1700 
1701  buffer.resize(buffer_size);
1702 
1703  // Broadcast the packed data
1704  this->broadcast (buffer, root_id);
1705 
1706  if (this->rank() != root_id)
1708  (buffer, context2, out_iter, (T*)nullptr);
1709  } while (true); // break above when we reach buffer_size==0
1710 }
1711 
1712 
1713 
1714 template <typename Context, typename OutputIter, typename T>
1715 inline void Communicator::nonblocking_receive_packed_range (const unsigned int src_processor_id,
1716  Context * context,
1717  OutputIter out,
1718  const T * /*output_type*/,
1719  Request & req,
1720  Status & stat,
1721  std::shared_ptr<std::vector<typename Parallel::Packing<T>::buffer_type>> & buffer,
1722  const MessageTag & tag) const
1723 {
1724  libmesh_experimental();
1725 
1726  // If they didn't pass in a buffer - let's make one
1727  if (buffer == nullptr)
1728  buffer = std::make_shared<std::vector<typename Parallel::Packing<T>::buffer_type>>();
1729  else
1730  buffer->clear();
1731 
1732  // Receive serialized variable size objects as a sequence of
1733  // buffer_t.
1734  // Allocate a buffer on the heap so we don't have to free it until
1735  // after the Request::wait()
1736  buffer->resize(stat.size());
1737  this->receive(src_processor_id, *buffer, req, tag);
1738 
1739  // Make the Request::wait() handle unpacking the buffer
1740  req.add_post_wait_work
1741  (new Parallel::PostWaitUnpackBuffer<std::vector<typename Parallel::Packing<T>::buffer_type>, Context, OutputIter, T>(*buffer, context, out));
1742 
1743  // Make it dereference the shared pointer (possibly freeing the buffer)
1744  req.add_post_wait_work
1746 }
1747 
1748 
1749 #else // LIBMESH_HAVE_MPI
1750 
1754 inline status Communicator::probe (const unsigned int,
1755  const MessageTag &) const
1756 { libmesh_not_implemented(); status s; return s; }
1757 
1761 template <typename T>
1762 inline void Communicator::send (const unsigned int,
1763  const T &,
1764  const MessageTag &) const
1765 { libmesh_not_implemented(); }
1766 
1767 template <typename T>
1768 inline void Communicator::send (const unsigned int,
1769  const T &,
1770  Request &,
1771  const MessageTag &) const
1772 { libmesh_not_implemented(); }
1773 
1774 template <typename T>
1775 inline void Communicator::send (const unsigned int,
1776  const T &,
1777  const DataType &,
1778  const MessageTag &) const
1779 { libmesh_not_implemented(); }
1780 
1781 template <typename T>
1782 inline void Communicator::send (const unsigned int,
1783  const T &,
1784  const DataType &,
1785  Request &,
1786  const MessageTag &) const
1787 { libmesh_not_implemented(); }
1788 
1789 template <typename Context, typename Iter>
1790 inline void Communicator::send_packed_range(const unsigned int,
1791  const Context *,
1792  Iter,
1793  const Iter,
1794  const MessageTag &) const
1795 { libmesh_not_implemented(); }
1796 
1797 template <typename Context, typename Iter>
1798 inline void Communicator::send_packed_range (const unsigned int,
1799  const Context *,
1800  Iter,
1801  const Iter,
1802  Request &,
1803  const MessageTag &) const
1804 { libmesh_not_implemented(); }
1805 
1809 template <typename T>
1810 inline Status Communicator::receive (const unsigned int,
1811  T &,
1812  const MessageTag &) const
1813 { libmesh_not_implemented(); return Status(); }
1814 
1815 template <typename T>
1816 inline void Communicator::receive(const unsigned int,
1817  T &,
1818  Request &,
1819  const MessageTag &) const
1820 { libmesh_not_implemented(); }
1821 
1822 template <typename T>
1823 inline Status Communicator::receive(const unsigned int,
1824  T &,
1825  const DataType &,
1826  const MessageTag &) const
1827 { libmesh_not_implemented(); return Status(); }
1828 
1829 template <typename T>
1830 inline void Communicator::receive(const unsigned int,
1831  T &,
1832  const DataType &,
1833  Request &,
1834  const MessageTag &) const
1835 { libmesh_not_implemented(); }
1836 
1837 template <typename Context, typename OutputIter, typename T>
1838 inline void
1839 Communicator::receive_packed_range(const unsigned int,
1840  Context *,
1841  OutputIter,
1842  const T *,
1843  const MessageTag &) const
1844 { libmesh_not_implemented(); }
1845 
1846 // template <typename Context, typename OutputIter>
1847 // inline void Communicator::receive_packed_range(const unsigned int, Context *, OutputIter, Request &, const MessageTag &) const
1848 // { libmesh_not_implemented(); }
1849 
1853 template <typename T1, typename T2>
1854 inline void Communicator::send_receive (const unsigned int send_tgt,
1855  const T1 & send_val,
1856  const unsigned int recv_source,
1857  T2 & recv_val,
1858  const MessageTag &,
1859  const MessageTag &) const
1860 {
1861  libmesh_assert_equal_to (send_tgt, 0);
1862  libmesh_assert_equal_to (recv_source, 0);
1863  recv_val = send_val;
1864 }
1865 
1872 template <typename Context1, typename RangeIter,
1873  typename Context2, typename OutputIter, typename T>
1874 inline void
1876  (const unsigned int libmesh_dbg_var(dest_processor_id),
1877  const Context1 * context1,
1878  RangeIter send_begin,
1879  const RangeIter send_end,
1880  const unsigned int libmesh_dbg_var(source_processor_id),
1881  Context2 * context2,
1882  OutputIter out_iter,
1883  const T * output_type,
1884  const MessageTag &,
1885  const MessageTag &) const
1886 {
1887  // This makes no sense on one processor unless we're deliberately
1888  // sending to ourself.
1889  libmesh_assert_equal_to(dest_processor_id, 0);
1890  libmesh_assert_equal_to(source_processor_id, 0);
1891 
1892  // On one processor, we just need to pack the range and then unpack
1893  // it again.
1894  typedef typename std::iterator_traits<RangeIter>::value_type T1;
1895  typedef typename Parallel::Packing<T1>::buffer_type buffer_t;
1896 
1897  while (send_begin != send_end)
1898  {
1899  libmesh_assert_greater (std::distance(send_begin, send_end), 0);
1900 
1901  // We will serialize variable size objects from *range_begin to
1902  // *range_end as a sequence of ints in this buffer
1903  std::vector<buffer_t> buffer;
1904 
1905  const RangeIter next_send_begin = Parallel::pack_range
1906  (context1, send_begin, send_end, buffer);
1907 
1908  libmesh_assert_greater (std::distance(send_begin, next_send_begin), 0);
1909 
1910  send_begin = next_send_begin;
1911 
1913  (buffer, context2, out_iter, output_type);
1914  }
1915 }
1916 
1917 #endif // LIBMESH_HAVE_MPI
1918 
1919 // Some of our methods are implemented indirectly via other
1920 // MPI-encapsulated methods and the implementation works with or
1921 // without MPI.
1922 //
1923 // Other methods have a "this->size() == 1" shortcut which still
1924 // applies in the without-MPI case, and the libmesh_call_mpi macro
1925 // becomes a no-op so wrapped MPI methods still compile without MPI
1926 
1927 template <typename T>
1928 inline bool Communicator::verify(const T & r) const
1929 {
1930  if (this->size() > 1 && Attributes<T>::has_min_max == true)
1931  {
1932  T tempmin = r, tempmax = r;
1933  this->min(tempmin);
1934  this->max(tempmax);
1935  bool verified = (r == tempmin) &&
1936  (r == tempmax);
1937  this->min(verified);
1938  return verified;
1939  }
1940 
1941  static_assert(Attributes<T>::has_min_max,
1942  "Tried to verify an unverifiable type");
1943 
1944  return true;
1945 }
1946 
1947 template <typename T>
1948 inline bool Communicator::semiverify(const T * r) const
1949 {
1950  if (this->size() > 1 && Attributes<T>::has_min_max == true)
1951  {
1952  T tempmin, tempmax;
1953  if (r)
1954  tempmin = tempmax = *r;
1955  else
1956  {
1957  Attributes<T>::set_highest(tempmin);
1958  Attributes<T>::set_lowest(tempmax);
1959  }
1960  this->min(tempmin);
1961  this->max(tempmax);
1962  bool invalid = r && ((*r != tempmin) ||
1963  (*r != tempmax));
1964  this->max(invalid);
1965  return !invalid;
1966  }
1967 
1968  static_assert(Attributes<T>::has_min_max,
1969  "Tried to semiverify an unverifiable type");
1970 
1971  return true;
1972 }
1973 
1974 
1975 
1976 inline bool Communicator::verify(const bool & r) const
1977 {
1978  const unsigned char rnew = r;
1979  return this->verify(rnew);
1980 }
1981 
1982 
1983 
1984 inline bool Communicator::semiverify(const bool * r) const
1985 {
1986  if (r)
1987  {
1988  const unsigned char rnew = *r;
1989  return this->semiverify(&rnew);
1990  }
1991 
1992  const unsigned char * rptr = nullptr;
1993  return this->semiverify(rptr);
1994 }
1995 
1996 
1997 
1998 template <typename T, typename A>
1999 inline bool Communicator::semiverify(const std::vector<T,A> * r) const
2000 {
2001  if (this->size() > 1 && Attributes<T>::has_min_max == true)
2002  {
2003  std::size_t rsize = r ? r->size() : 0;
2004  std::size_t * psize = r ? &rsize : nullptr;
2005 
2006  if (!this->semiverify(psize))
2007  return false;
2008 
2009  this->max(rsize);
2010 
2011  std::vector<T,A> tempmin, tempmax;
2012  if (r)
2013  {
2014  tempmin = tempmax = *r;
2015  }
2016  else
2017  {
2018  tempmin.resize(rsize);
2019  tempmax.resize(rsize);
2020  Attributes<std::vector<T,A>>::set_highest(tempmin);
2021  Attributes<std::vector<T,A>>::set_lowest(tempmax);
2022  }
2023  this->min(tempmin);
2024  this->max(tempmax);
2025  bool invalid = r && ((*r != tempmin) ||
2026  (*r != tempmax));
2027  this->max(invalid);
2028  return !invalid;
2029  }
2030 
2031  static_assert(Attributes<T>::has_min_max,
2032  "Tried to semiverify a vector of an unverifiable type");
2033 
2034  return true;
2035 }
2036 
2037 
2038 
2039 inline bool Communicator::verify(const std::string & r) const
2040 {
2041  if (this->size() > 1)
2042  {
2043  // Cannot use <char> since MPI_MIN is not
2044  // strictly defined for chars!
2045  std::vector<short int> temp; temp.reserve(r.size());
2046  for (std::size_t i=0; i != r.size(); ++i)
2047  temp.push_back(r[i]);
2048  return this->verify(temp);
2049  }
2050  return true;
2051 }
2052 
2053 
2054 
2055 inline bool Communicator::semiverify(const std::string * r) const
2056 {
2057  if (this->size() > 1)
2058  {
2059  std::size_t rsize = r ? r->size() : 0;
2060  std::size_t * psize = r ? &rsize : nullptr;
2061 
2062  if (!this->semiverify(psize))
2063  return false;
2064 
2065  this->max(rsize);
2066 
2067  // Cannot use <char> since MPI_MIN is not
2068  // strictly defined for chars!
2069  std::vector<short int> temp (rsize);
2070  if (r)
2071  {
2072  temp.reserve(rsize);
2073  for (std::size_t i=0; i != rsize; ++i)
2074  temp.push_back((*r)[i]);
2075  }
2076 
2077  std::vector<short int> * ptemp = r ? &temp: nullptr;
2078 
2079  return this->semiverify(ptemp);
2080  }
2081  return true;
2082 }
2083 
2084 
2085 
2086 template <typename T>
2087 inline void Communicator::min(T & libmesh_mpi_var(r)) const
2088 {
2089  if (this->size() > 1)
2090  {
2091  LOG_SCOPE("min(scalar)", "Parallel");
2092 
2093  libmesh_call_mpi
2094  (MPI_Allreduce (MPI_IN_PLACE, &r, 1,
2096  this->get()));
2097  }
2098 }
2099 
2100 
2101 
2102 inline void Communicator::min(bool & r) const
2103 {
2104  if (this->size() > 1)
2105  {
2106  LOG_SCOPE("min(bool)", "Parallel");
2107 
2108  unsigned int temp = r;
2109  libmesh_call_mpi
2110  (MPI_Allreduce (MPI_IN_PLACE, &temp, 1,
2113  this->get()));
2114  r = temp;
2115  }
2116 }
2117 
2118 
2119 template <typename T, typename A>
2120 inline void Communicator::min(std::vector<T,A> & r) const
2121 {
2122  if (this->size() > 1 && !r.empty())
2123  {
2124  LOG_SCOPE("min(vector)", "Parallel");
2125 
2126  libmesh_assert(this->verify(r.size()));
2127 
2128  libmesh_call_mpi
2129  (MPI_Allreduce (MPI_IN_PLACE, r.data(),
2130  cast_int<int>(r.size()),
2131  StandardType<T>(r.data()),
2133  this->get()));
2134  }
2135 }
2136 
2137 
2138 template <typename A>
2139 inline void Communicator::min(std::vector<bool,A> & r) const
2140 {
2141  if (this->size() > 1 && !r.empty())
2142  {
2143  LOG_SCOPE("min(vector<bool>)", "Parallel");
2144 
2145  libmesh_assert(this->verify(r.size()));
2146 
2147  std::vector<unsigned int> ruint;
2148  pack_vector_bool(r, ruint);
2149  std::vector<unsigned int> temp(ruint.size());
2150  libmesh_call_mpi
2151  (MPI_Allreduce (ruint.data(), temp.data(),
2152  cast_int<int>(ruint.size()),
2153  StandardType<unsigned int>(), MPI_BAND,
2154  this->get()));
2155  unpack_vector_bool(temp, r);
2156  }
2157 }
2158 
2159 
2160 template <typename T>
2161 inline void Communicator::minloc(T & r,
2162  unsigned int & min_id) const
2163 {
2164  if (this->size() > 1)
2165  {
2166  LOG_SCOPE("minloc(scalar)", "Parallel");
2167 
2168  DataPlusInt<T> data_in;
2169  libmesh_ignore(data_in); // unused ifndef LIBMESH_HAVE_MPI
2170  data_in.val = r;
2171  data_in.rank = this->rank();
2172  libmesh_call_mpi
2173  (MPI_Allreduce (MPI_IN_PLACE, &data_in, 1, dataplusint_type<T>(),
2174  OpFunction<T>::min_location(), this->get()));
2175  r = data_in.val;
2176  min_id = data_in.rank;
2177  }
2178  else
2179  min_id = this->rank();
2180 }
2181 
2182 
2183 inline void Communicator::minloc(bool & r,
2184  unsigned int & min_id) const
2185 {
2186  if (this->size() > 1)
2187  {
2188  LOG_SCOPE("minloc(bool)", "Parallel");
2189 
2190  DataPlusInt<int> data_in;
2191  libmesh_ignore(data_in); // unused ifndef LIBMESH_HAVE_MPI
2192  data_in.val = r;
2193  data_in.rank = this->rank();
2194  DataPlusInt<int> data_out;
2195  libmesh_call_mpi
2196  (MPI_Allreduce (&data_in, &data_out, 1,
2198  OpFunction<int>::min_location(), this->get()));
2199  r = data_out.val;
2200  min_id = data_out.rank;
2201  }
2202  else
2203  min_id = this->rank();
2204 }
2205 
2206 
2207 template <typename T, typename A1, typename A2>
2208 inline void Communicator::minloc(std::vector<T,A1> & r,
2209  std::vector<unsigned int,A2> & min_id) const
2210 {
2211  if (this->size() > 1 && !r.empty())
2212  {
2213  LOG_SCOPE("minloc(vector)", "Parallel");
2214 
2215  libmesh_assert(this->verify(r.size()));
2216 
2217  std::vector<DataPlusInt<T>> data_in(r.size());
2218  for (std::size_t i=0; i != r.size(); ++i)
2219  {
2220  data_in[i].val = r[i];
2221  data_in[i].rank = this->rank();
2222  }
2223  std::vector<DataPlusInt<T>> data_out(r.size());
2224  libmesh_call_mpi
2225  (MPI_Allreduce (data_in.data(), data_out.data(),
2226  cast_int<int>(r.size()),
2227  dataplusint_type<T>(),
2228  OpFunction<T>::min_location(), this->get()));
2229  for (std::size_t i=0; i != r.size(); ++i)
2230  {
2231  r[i] = data_out[i].val;
2232  min_id[i] = data_out[i].rank;
2233  }
2234  }
2235  else if (!r.empty())
2236  {
2237  for (std::size_t i=0; i != r.size(); ++i)
2238  min_id[i] = this->rank();
2239  }
2240 }
2241 
2242 
2243 template <typename A1, typename A2>
2244 inline void Communicator::minloc(std::vector<bool,A1> & r,
2245  std::vector<unsigned int,A2> & min_id) const
2246 {
2247  if (this->size() > 1 && !r.empty())
2248  {
2249  LOG_SCOPE("minloc(vector<bool>)", "Parallel");
2250 
2251  libmesh_assert(this->verify(r.size()));
2252 
2253  std::vector<DataPlusInt<int>> data_in(r.size());
2254  for (std::size_t i=0; i != r.size(); ++i)
2255  {
2256  data_in[i].val = r[i];
2257  data_in[i].rank = this->rank();
2258  }
2259  std::vector<DataPlusInt<int>> data_out(r.size());
2260  libmesh_call_mpi
2261  (MPI_Allreduce (data_in.data(), data_out.data(),
2262  cast_int<int>(r.size()),
2264  OpFunction<int>::min_location(), this->get()));
2265  for (std::size_t i=0; i != r.size(); ++i)
2266  {
2267  r[i] = data_out[i].val;
2268  min_id[i] = data_out[i].rank;
2269  }
2270  }
2271  else if (!r.empty())
2272  {
2273  for (std::size_t i=0; i != r.size(); ++i)
2274  min_id[i] = this->rank();
2275  }
2276 }
2277 
2278 
2279 template <typename T>
2280 inline void Communicator::max(T & libmesh_mpi_var(r)) const
2281 {
2282  if (this->size() > 1)
2283  {
2284  LOG_SCOPE("max(scalar)", "Parallel");
2285 
2286  libmesh_call_mpi
2287  (MPI_Allreduce (MPI_IN_PLACE, &r, 1,
2288  StandardType<T>(&r),
2290  this->get()));
2291  }
2292 }
2293 
2294 
2295 inline void Communicator::max(bool & r) const
2296 {
2297  if (this->size() > 1)
2298  {
2299  LOG_SCOPE("max(bool)", "Parallel");
2300 
2301  unsigned int temp = r;
2302  libmesh_call_mpi
2303  (MPI_Allreduce (MPI_IN_PLACE, &temp, 1,
2306  this->get()));
2307  r = temp;
2308  }
2309 }
2310 
2311 
2312 template <typename T, typename A>
2313 inline void Communicator::max(std::vector<T,A> & r) const
2314 {
2315  if (this->size() > 1 && !r.empty())
2316  {
2317  LOG_SCOPE("max(vector)", "Parallel");
2318 
2319  libmesh_assert(this->verify(r.size()));
2320 
2321  libmesh_call_mpi
2322  (MPI_Allreduce (MPI_IN_PLACE, r.data(),
2323  cast_int<int>(r.size()),
2324  StandardType<T>(r.data()),
2326  this->get()));
2327  }
2328 }
2329 
2330 
2331 template <typename A>
2332 inline void Communicator::max(std::vector<bool,A> & r) const
2333 {
2334  if (this->size() > 1 && !r.empty())
2335  {
2336  LOG_SCOPE("max(vector<bool>)", "Parallel");
2337 
2338  libmesh_assert(this->verify(r.size()));
2339 
2340  std::vector<unsigned int> ruint;
2341  pack_vector_bool(r, ruint);
2342  std::vector<unsigned int> temp(ruint.size());
2343  libmesh_call_mpi
2344  (MPI_Allreduce (ruint.data(), temp.data(),
2345  cast_int<int>(ruint.size()),
2346  StandardType<unsigned int>(), MPI_BOR,
2347  this->get()));
2348  unpack_vector_bool(temp, r);
2349  }
2350 }
2351 
2352 
2353 template <typename T>
2354 inline void Communicator::maxloc(T & r,
2355  unsigned int & max_id) const
2356 {
2357  if (this->size() > 1)
2358  {
2359  LOG_SCOPE("maxloc(scalar)", "Parallel");
2360 
2361  DataPlusInt<T> data_in;
2362  libmesh_ignore(data_in); // unused ifndef LIBMESH_HAVE_MPI
2363  data_in.val = r;
2364  data_in.rank = this->rank();
2365  libmesh_call_mpi
2366  (MPI_Allreduce (MPI_IN_PLACE, &data_in, 1,
2367  dataplusint_type<T>(),
2369  this->get()));
2370  r = data_in.val;
2371  max_id = data_in.rank;
2372  }
2373  else
2374  max_id = this->rank();
2375 }
2376 
2377 
2378 inline void Communicator::maxloc(bool & r,
2379  unsigned int & max_id) const
2380 {
2381  if (this->size() > 1)
2382  {
2383  LOG_SCOPE("maxloc(bool)", "Parallel");
2384 
2385  DataPlusInt<int> data_in;
2386  libmesh_ignore(data_in); // unused ifndef LIBMESH_HAVE_MPI
2387  data_in.val = r;
2388  data_in.rank = this->rank();
2389  DataPlusInt<int> data_out;
2390  libmesh_call_mpi
2391  (MPI_Allreduce (&data_in, &data_out, 1,
2394  this->get()));
2395  r = data_out.val;
2396  max_id = data_out.rank;
2397  }
2398  else
2399  max_id = this->rank();
2400 }
2401 
2402 
2403 template <typename T, typename A1, typename A2>
2404 inline void Communicator::maxloc(std::vector<T,A1> & r,
2405  std::vector<unsigned int,A2> & max_id) const
2406 {
2407  if (this->size() > 1 && !r.empty())
2408  {
2409  LOG_SCOPE("maxloc(vector)", "Parallel");
2410 
2411  libmesh_assert(this->verify(r.size()));
2412 
2413  std::vector<DataPlusInt<T>> data_in(r.size());
2414  for (std::size_t i=0; i != r.size(); ++i)
2415  {
2416  data_in[i].val = r[i];
2417  data_in[i].rank = this->rank();
2418  }
2419  std::vector<DataPlusInt<T>> data_out(r.size());
2420  libmesh_call_mpi
2421  (MPI_Allreduce (data_in.data(), data_out.data(),
2422  cast_int<int>(r.size()),
2423  dataplusint_type<T>(),
2425  this->get()));
2426  for (std::size_t i=0; i != r.size(); ++i)
2427  {
2428  r[i] = data_out[i].val;
2429  max_id[i] = data_out[i].rank;
2430  }
2431  }
2432  else if (!r.empty())
2433  {
2434  for (std::size_t i=0; i != r.size(); ++i)
2435  max_id[i] = this->rank();
2436  }
2437 }
2438 
2439 
2440 template <typename A1, typename A2>
2441 inline void Communicator::maxloc(std::vector<bool,A1> & r,
2442  std::vector<unsigned int,A2> & max_id) const
2443 {
2444  if (this->size() > 1 && !r.empty())
2445  {
2446  LOG_SCOPE("maxloc(vector<bool>)", "Parallel");
2447 
2448  libmesh_assert(this->verify(r.size()));
2449 
2450  std::vector<DataPlusInt<int>> data_in(r.size());
2451  for (std::size_t i=0; i != r.size(); ++i)
2452  {
2453  data_in[i].val = r[i];
2454  data_in[i].rank = this->rank();
2455  }
2456  std::vector<DataPlusInt<int>> data_out(r.size());
2457  libmesh_call_mpi
2458  (MPI_Allreduce (data_in.data(), data_out.data(),
2459  cast_int<int>(r.size()),
2462  this->get()));
2463  for (std::size_t i=0; i != r.size(); ++i)
2464  {
2465  r[i] = data_out[i].val;
2466  max_id[i] = data_out[i].rank;
2467  }
2468  }
2469  else if (!r.empty())
2470  {
2471  for (std::size_t i=0; i != r.size(); ++i)
2472  max_id[i] = this->rank();
2473  }
2474 }
2475 
2476 
2477 template <typename T>
2478 inline void Communicator::sum(T & libmesh_mpi_var(r)) const
2479 {
2480  if (this->size() > 1)
2481  {
2482  LOG_SCOPE("sum()", "Parallel");
2483 
2484  libmesh_call_mpi
2485  (MPI_Allreduce (MPI_IN_PLACE, &r, 1,
2486  StandardType<T>(&r),
2488  this->get()));
2489  }
2490 }
2491 
2492 
2493 template <typename T, typename A>
2494 inline void Communicator::sum(std::vector<T,A> & r) const
2495 {
2496  if (this->size() > 1 && !r.empty())
2497  {
2498  LOG_SCOPE("sum()", "Parallel");
2499 
2500  libmesh_assert(this->verify(r.size()));
2501 
2502  libmesh_call_mpi
2503  (MPI_Allreduce (MPI_IN_PLACE, r.data(),
2504  cast_int<int>(r.size()),
2505  StandardType<T>(r.data()),
2507  this->get()));
2508  }
2509 }
2510 
2511 
2512 // We still do function overloading for complex sums - in a perfect
2513 // world we'd have a StandardSumOp to go along with StandardType...
2514 template <typename T>
2515 inline void Communicator::sum(std::complex<T> & libmesh_mpi_var(r)) const
2516 {
2517  if (this->size() > 1)
2518  {
2519  LOG_SCOPE("sum()", "Parallel");
2520 
2521  libmesh_call_mpi
2522  (MPI_Allreduce (MPI_IN_PLACE, &r, 2,
2523  StandardType<T>(),
2525  this->get()));
2526  }
2527 }
2528 
2529 
2530 template <typename T, typename A>
2531 inline void Communicator::sum(std::vector<std::complex<T>,A> & r) const
2532 {
2533  if (this->size() > 1 && !r.empty())
2534  {
2535  LOG_SCOPE("sum()", "Parallel");
2536 
2537  libmesh_assert(this->verify(r.size()));
2538 
2539  libmesh_call_mpi
2540  (MPI_Allreduce (MPI_IN_PLACE, r.data(),
2541  cast_int<int>(r.size() * 2),
2542  StandardType<T>(nullptr),
2543  OpFunction<T>::sum(), this->get()));
2544  }
2545 }
2546 
2547 
2548 
2549 template <typename T, typename C, typename A>
2550 inline void Communicator::set_union(std::set<T,C,A> & data,
2551  const unsigned int root_id) const
2552 {
2553  if (this->size() > 1)
2554  {
2555  std::vector<T> vecdata(data.begin(), data.end());
2556  this->gather(root_id, vecdata);
2557  if (this->rank() == root_id)
2558  data.insert(vecdata.begin(), vecdata.end());
2559  }
2560 }
2561 
2562 
2563 
2564 template <typename T, typename C, typename A>
2565 inline void Communicator::set_union(std::set<T,C,A> & data) const
2566 {
2567  if (this->size() > 1)
2568  {
2569  std::vector<T> vecdata(data.begin(), data.end());
2570  this->allgather(vecdata, false);
2571  data.insert(vecdata.begin(), vecdata.end());
2572  }
2573 }
2574 
2575 
2576 
2577 template <typename T1, typename T2, typename C, typename A>
2578 inline void Communicator::set_union(std::map<T1,T2,C,A> & data,
2579  const unsigned int root_id) const
2580 {
2581  if (this->size() > 1)
2582  {
2583  std::vector<std::pair<T1,T2>> vecdata(data.begin(), data.end());
2584  this->gather(root_id, vecdata);
2585  if (this->rank() == root_id)
2586  data.insert(vecdata.begin(), vecdata.end());
2587  }
2588 }
2589 
2590 
2591 
2592 template <typename T1, typename T2, typename C, typename A>
2593 inline void Communicator::set_union(std::map<T1,T2,C,A> & data) const
2594 {
2595  if (this->size() > 1)
2596  {
2597  std::vector<std::pair<T1,T2>> vecdata(data.begin(), data.end());
2598  this->allgather(vecdata, false);
2599  data.insert(vecdata.begin(), vecdata.end());
2600  }
2601 }
2602 
2603 
2604 
2605 template <typename T, typename A>
2606 inline void Communicator::gather(const unsigned int root_id,
2607  const T & sendval,
2608  std::vector<T,A> & recv) const
2609 {
2610  libmesh_assert_less (root_id, this->size());
2611 
2612  if (this->rank() == root_id)
2613  recv.resize(this->size());
2614 
2615  if (this->size() > 1)
2616  {
2617  LOG_SCOPE("gather()", "Parallel");
2618 
2619  StandardType<T> send_type(&sendval);
2620 
2621  libmesh_call_mpi
2622  (MPI_Gather(const_cast<T*>(&sendval), 1, send_type,
2623  recv.empty() ? nullptr : recv.data(), 1, send_type,
2624  root_id, this->get()));
2625  }
2626  else
2627  recv[0] = sendval;
2628 }
2629 
2630 
2631 
2632 template <typename T, typename A>
2633 inline void Communicator::gather(const unsigned int root_id,
2634  std::vector<T,A> & r) const
2635 {
2636  if (this->size() == 1)
2637  {
2638  libmesh_assert (!this->rank());
2639  libmesh_assert (!root_id);
2640  return;
2641  }
2642 
2643  libmesh_assert_less (root_id, this->size());
2644 
2645  std::vector<int>
2646  sendlengths (this->size(), 0),
2647  displacements(this->size(), 0);
2648 
2649  const int mysize = static_cast<int>(r.size());
2650  this->allgather(mysize, sendlengths);
2651 
2652  LOG_SCOPE("gather()", "Parallel");
2653 
2654  // Find the total size of the final array and
2655  // set up the displacement offsets for each processor.
2656  unsigned int globalsize = 0;
2657  for (unsigned int i=0; i != this->size(); ++i)
2658  {
2659  displacements[i] = globalsize;
2660  globalsize += sendlengths[i];
2661  }
2662 
2663  // Check for quick return
2664  if (globalsize == 0)
2665  return;
2666 
2667  // copy the input buffer
2668  std::vector<T,A> r_src(r);
2669 
2670  // now resize it to hold the global data
2671  // on the receiving processor
2672  if (root_id == this->rank())
2673  r.resize(globalsize);
2674 
2675  // and get the data from the remote processors
2676  libmesh_call_mpi
2677  (MPI_Gatherv (r_src.empty() ? nullptr : r_src.data(), mysize,
2678  StandardType<T>(), r.empty() ? nullptr : r.data(),
2679  sendlengths.data(), displacements.data(),
2680  StandardType<T>(), root_id, this->get()));
2681 }
2682 
2683 
2684 
2685 template <typename T, typename A>
2686 inline void Communicator::gather(const unsigned int root_id,
2687  const std::basic_string<T> & sendval,
2688  std::vector<std::basic_string<T>,A> & recv,
2689  const bool identical_buffer_sizes) const
2690 {
2691  libmesh_assert_less (root_id, this->size());
2692 
2693  if (this->rank() == root_id)
2694  recv.resize(this->size());
2695 
2696  if (this->size() > 1)
2697  {
2698  LOG_SCOPE ("gather()","Parallel");
2699 
2700  std::vector<int>
2701  sendlengths (this->size(), 0),
2702  displacements(this->size(), 0);
2703 
2704  const int mysize = static_cast<int>(sendval.size());
2705 
2706  if (identical_buffer_sizes)
2707  sendlengths.assign(this->size(), mysize);
2708  else
2709  // first comm step to determine buffer sizes from all processors
2710  this->gather(root_id, mysize, sendlengths);
2711 
2712  // Find the total size of the final array and
2713  // set up the displacement offsets for each processor
2714  unsigned int globalsize = 0;
2715  for (unsigned int i=0; i < this->size(); ++i)
2716  {
2717  displacements[i] = globalsize;
2718  globalsize += sendlengths[i];
2719  }
2720 
2721  // monolithic receive buffer
2722  std::string r;
2723  if (this->rank() == root_id)
2724  r.resize(globalsize, 0);
2725 
2726  // and get the data from the remote processors.
2727  libmesh_call_mpi
2728  (MPI_Gatherv (const_cast<T*>(sendval.data()),
2729  mysize, StandardType<T>(),
2730  this->rank() == root_id ? &r[0] : nullptr,
2731  sendlengths.data(), displacements.data(),
2732  StandardType<T>(), root_id, this->get()));
2733 
2734  // slice receive buffer up
2735  if (this->rank() == root_id)
2736  for (unsigned int i=0; i != this->size(); ++i)
2737  recv[i] = r.substr(displacements[i], sendlengths[i]);
2738  }
2739  else
2740  recv[0] = sendval;
2741 }
2742 
2743 
2744 
2745 template <typename T, typename A>
2746 inline void Communicator::allgather(const T & sendval,
2747  std::vector<T,A> & recv) const
2748 {
2749  LOG_SCOPE ("allgather()","Parallel");
2750 
2751  libmesh_assert(this->size());
2752  recv.resize(this->size());
2753 
2754  unsigned int comm_size = this->size();
2755  if (comm_size > 1)
2756  {
2757  StandardType<T> send_type(&sendval);
2758 
2759  libmesh_call_mpi
2760  (MPI_Allgather (const_cast<T*>(&sendval), 1, send_type, recv.data(), 1,
2761  send_type, this->get()));
2762  }
2763  else if (comm_size > 0)
2764  recv[0] = sendval;
2765 }
2766 
2767 
2768 
2769 template <typename T, typename A>
2770 inline void Communicator::allgather(std::vector<T,A> & r,
2771  const bool identical_buffer_sizes) const
2772 {
2773  if (this->size() < 2)
2774  return;
2775 
2776  LOG_SCOPE("allgather()", "Parallel");
2777 
2778  if (identical_buffer_sizes)
2779  {
2780  if (r.empty())
2781  return;
2782 
2783  libmesh_assert(this->verify(r.size()));
2784 
2785  std::vector<T,A> r_src(r.size()*this->size());
2786  r_src.swap(r);
2787  StandardType<T> send_type(r_src.data());
2788 
2789  libmesh_call_mpi
2790  (MPI_Allgather (r_src.data(), cast_int<int>(r_src.size()),
2791  send_type, r.data(), cast_int<int>(r_src.size()),
2792  send_type, this->get()));
2793  // libmesh_assert(this->verify(r));
2794  return;
2795  }
2796 
2797  std::vector<int>
2798  sendlengths (this->size(), 0),
2799  displacements(this->size(), 0);
2800 
2801  const int mysize = static_cast<int>(r.size());
2802  this->allgather(mysize, sendlengths);
2803 
2804  // Find the total size of the final array and
2805  // set up the displacement offsets for each processor.
2806  unsigned int globalsize = 0;
2807  for (unsigned int i=0; i != this->size(); ++i)
2808  {
2809  displacements[i] = globalsize;
2810  globalsize += sendlengths[i];
2811  }
2812 
2813  // Check for quick return
2814  if (globalsize == 0)
2815  return;
2816 
2817  // copy the input buffer
2818  std::vector<T,A> r_src(globalsize);
2819  r_src.swap(r);
2820 
2821  StandardType<T> send_type(r.data());
2822 
2823  // and get the data from the remote processors.
2824  // Pass nullptr if our vector is empty.
2825  libmesh_call_mpi
2826  (MPI_Allgatherv (r_src.empty() ? nullptr : r_src.data(), mysize,
2827  send_type, r.data(), sendlengths.data(),
2828  displacements.data(), send_type, this->get()));
2829 }
2830 
2831 
2832 
2833 template <typename T, typename A>
2834 inline void Communicator::allgather(std::vector<std::basic_string<T>,A> & r,
2835  const bool identical_buffer_sizes) const
2836 {
2837  if (this->size() < 2)
2838  return;
2839 
2840  LOG_SCOPE("allgather()", "Parallel");
2841 
2842  if (identical_buffer_sizes)
2843  {
2844  libmesh_assert(this->verify(r.size()));
2845 
2846  // identical_buffer_sizes doesn't buy us much since we have to
2847  // communicate the lengths of strings within each buffer anyway
2848  if (r.empty())
2849  return;
2850  }
2851 
2852  // Concatenate the input buffer into a send buffer, and keep track
2853  // of input string lengths
2854  std::vector<int> mystrlengths (r.size());
2855  std::vector<T> concat_src;
2856 
2857  int myconcatsize = 0;
2858  for (unsigned int i=0; i != r.size(); ++i)
2859  {
2860  int stringlen = cast_int<int>(r[i].size());
2861  mystrlengths[i] = stringlen;
2862  myconcatsize += stringlen;
2863  }
2864  concat_src.reserve(myconcatsize);
2865  for (unsigned int i=0; i != r.size(); ++i)
2866  concat_src.insert
2867  (concat_src.end(), r[i].begin(), r[i].end());
2868 
2869  // Get the string lengths from all other processors
2870  std::vector<int> strlengths = mystrlengths;
2871  this->allgather(strlengths, identical_buffer_sizes);
2872 
2873  // We now know how many strings we'll be receiving
2874  r.resize(strlengths.size());
2875 
2876  // Get the concatenated data sizes from all other processors
2877  std::vector<int> concat_sizes;
2878  this->allgather(myconcatsize, concat_sizes);
2879 
2880  // Find the total size of the final concatenated array and
2881  // set up the displacement offsets for each processor.
2882  std::vector<int> displacements(this->size(), 0);
2883  unsigned int globalsize = 0;
2884  for (unsigned int i=0; i != this->size(); ++i)
2885  {
2886  displacements[i] = globalsize;
2887  globalsize += concat_sizes[i];
2888  }
2889 
2890  // Check for quick return
2891  if (globalsize == 0)
2892  return;
2893 
2894  // Get the concatenated data from the remote processors.
2895  // Pass nullptr if our vector is empty.
2896  std::vector<T> concat(globalsize);
2897 
2898  // We may have concat_src.empty(), but we know concat has at least
2899  // one element we can use as an example for StandardType
2900  StandardType<T> send_type(concat.data());
2901 
2902  libmesh_call_mpi
2903  (MPI_Allgatherv (concat_src.empty() ?
2904  nullptr : concat_src.data(), myconcatsize,
2905  send_type, concat.data(), concat_sizes.data(),
2906  displacements.data(), send_type, this->get()));
2907 
2908  // Finally, split concatenated data into strings
2909  const T * begin = concat.data();
2910  for (unsigned int i=0; i != r.size(); ++i)
2911  {
2912  const T * end = begin + strlengths[i];
2913  r[i].assign(begin, end);
2914  begin = end;
2915  }
2916 }
2917 
2918 
2919 
2920 template <typename T, typename A>
2921 void Communicator::scatter(const std::vector<T,A> & data,
2922  T & recv,
2923  const unsigned int root_id) const
2924 {
2925  libmesh_ignore(root_id); // Only needed for MPI and/or dbg/devel
2926  libmesh_assert_less (root_id, this->size());
2927 
2928  // Do not allow the root_id to scatter a nullptr vector.
2929  // That would leave recv in an indeterminate state.
2930  libmesh_assert (this->rank() != root_id || this->size() == data.size());
2931 
2932  if (this->size() == 1)
2933  {
2934  libmesh_assert (!this->rank());
2935  libmesh_assert (!root_id);
2936  recv = data[0];
2937  return;
2938  }
2939 
2940  LOG_SCOPE("scatter()", "Parallel");
2941 
2942  T * data_ptr = const_cast<T*>(data.empty() ? nullptr : data.data());
2943  libmesh_ignore(data_ptr); // unused ifndef LIBMESH_HAVE_MPI
2944 
2945  libmesh_call_mpi
2946  (MPI_Scatter (data_ptr, 1, StandardType<T>(data_ptr),
2947  &recv, 1, StandardType<T>(&recv), root_id, this->get()));
2948 }
2949 
2950 
2951 
2952 template <typename T, typename A>
2953 void Communicator::scatter(const std::vector<T,A> & data,
2954  std::vector<T,A> & recv,
2955  const unsigned int root_id) const
2956 {
2957  libmesh_assert_less (root_id, this->size());
2958 
2959  if (this->size() == 1)
2960  {
2961  libmesh_assert (!this->rank());
2962  libmesh_assert (!root_id);
2963  recv.assign(data.begin(), data.end());
2964  return;
2965  }
2966 
2967  LOG_SCOPE("scatter()", "Parallel");
2968 
2969  int recv_buffer_size = 0;
2970  if (this->rank() == root_id)
2971  {
2972  libmesh_assert(data.size() % this->size() == 0);
2973  recv_buffer_size = cast_int<int>(data.size() / this->size());
2974  }
2975 
2976  this->broadcast(recv_buffer_size);
2977  recv.resize(recv_buffer_size);
2978 
2979  T * data_ptr = const_cast<T*>(data.empty() ? nullptr : data.data());
2980  T * recv_ptr = recv.empty() ? nullptr : recv.data();
2981  libmesh_ignore(data_ptr, recv_ptr); // unused ifndef LIBMESH_HAVE_MPI
2982 
2983  libmesh_call_mpi
2984  (MPI_Scatter (data_ptr, recv_buffer_size, StandardType<T>(data_ptr),
2985  recv_ptr, recv_buffer_size, StandardType<T>(recv_ptr), root_id, this->get()));
2986 }
2987 
2988 
2989 
2990 template <typename T, typename A1, typename A2>
2991 void Communicator::scatter(const std::vector<T,A1> & data,
2992  const std::vector<int,A2> counts,
2993  std::vector<T,A1> & recv,
2994  const unsigned int root_id) const
2995 {
2996  libmesh_assert_less (root_id, this->size());
2997 
2998  if (this->size() == 1)
2999  {
3000  libmesh_assert (!this->rank());
3001  libmesh_assert (!root_id);
3002  libmesh_assert (counts.size() == this->size());
3003  recv.assign(data.begin(), data.begin() + counts[0]);
3004  return;
3005  }
3006 
3007  std::vector<int,A2> displacements(this->size(), 0);
3008  if (root_id == this->rank())
3009  {
3010  libmesh_assert(counts.size() == this->size());
3011 
3012  // Create a displacements vector from the incoming counts vector
3013  unsigned int globalsize = 0;
3014  for (unsigned int i=0; i < this->size(); ++i)
3015  {
3016  displacements[i] = globalsize;
3017  globalsize += counts[i];
3018  }
3019 
3020  libmesh_assert(data.size() == globalsize);
3021  }
3022 
3023  LOG_SCOPE("scatter()", "Parallel");
3024 
3025  // Scatter the buffer sizes to size remote buffers
3026  int recv_buffer_size = 0;
3027  this->scatter(counts, recv_buffer_size, root_id);
3028  recv.resize(recv_buffer_size);
3029 
3030  T * data_ptr = const_cast<T*>(data.empty() ? nullptr : data.data());
3031  int * count_ptr = const_cast<int*>(counts.empty() ? nullptr : counts.data());
3032  T * recv_ptr = recv.empty() ? nullptr : recv.data();
3033  libmesh_ignore(data_ptr, count_ptr, recv_ptr); // unused ifndef LIBMESH_HAVE_MPI
3034 
3035  // Scatter the non-uniform chunks
3036  libmesh_call_mpi
3037  (MPI_Scatterv (data_ptr, count_ptr, displacements.data(), StandardType<T>(data_ptr),
3038  recv_ptr, recv_buffer_size, StandardType<T>(recv_ptr), root_id, this->get()));
3039 }
3040 
3041 
3042 
3043 template <typename T, typename A1, typename A2>
3044 void Communicator::scatter(const std::vector<std::vector<T,A1>,A2> & data,
3045  std::vector<T,A1> & recv,
3046  const unsigned int root_id,
3047  const bool identical_buffer_sizes) const
3048 {
3049  libmesh_assert_less (root_id, this->size());
3050 
3051  if (this->size() == 1)
3052  {
3053  libmesh_assert (!this->rank());
3054  libmesh_assert (!root_id);
3055  libmesh_assert (data.size() == this->size());
3056  recv.assign(data[0].begin(), data[0].end());
3057  return;
3058  }
3059 
3060  std::vector<T,A1> stacked_data;
3061  std::vector<int> counts;
3062 
3063  if (root_id == this->rank())
3064  {
3065  libmesh_assert (data.size() == this->size());
3066 
3067  if (!identical_buffer_sizes)
3068  counts.resize(this->size());
3069 
3070  for (std::size_t i=0; i < data.size(); ++i)
3071  {
3072  if (!identical_buffer_sizes)
3073  counts[i] = cast_int<int>(data[i].size());
3074 #ifndef NDEBUG
3075  else
3076  // Check that buffer sizes are indeed equal
3077  libmesh_assert(!i || data[i-1].size() == data[i].size());
3078 #endif
3079  std::copy(data[i].begin(), data[i].end(), std::back_inserter(stacked_data));
3080  }
3081  }
3082 
3083  if (identical_buffer_sizes)
3084  this->scatter(stacked_data, recv, root_id);
3085  else
3086  this->scatter(stacked_data, counts, recv, root_id);
3087 }
3088 
3089 
3090 
3091 template <typename T, typename A>
3092 inline void Communicator::alltoall(std::vector<T,A> & buf) const
3093 {
3094  if (this->size() < 2 || buf.empty())
3095  return;
3096 
3097  LOG_SCOPE("alltoall()", "Parallel");
3098 
3099  // the per-processor size. this is the same for all
3100  // processors using MPI_Alltoall, could be variable
3101  // using MPI_Alltoallv
3102  const int size_per_proc =
3103  cast_int<int>(buf.size()/this->size());
3104  libmesh_ignore(size_per_proc);
3105 
3106  libmesh_assert_equal_to (buf.size()%this->size(), 0);
3107 
3108  libmesh_assert(this->verify(size_per_proc));
3109 
3110  StandardType<T> send_type(buf.data());
3111 
3112  libmesh_call_mpi
3113  (MPI_Alltoall (MPI_IN_PLACE, size_per_proc, send_type, buf.data(),
3114  size_per_proc, send_type, this->get()));
3115 }
3116 
3117 
3118 
3119 template <typename T>
3120 inline void Communicator::broadcast (T & libmesh_mpi_var(data), const unsigned int root_id) const
3121 {
3122  libmesh_ignore(root_id); // Only needed for MPI and/or dbg/devel
3123  if (this->size() == 1)
3124  {
3125  libmesh_assert (!this->rank());
3126  libmesh_assert (!root_id);
3127  return;
3128  }
3129 
3130  libmesh_assert_less (root_id, this->size());
3131 
3132  LOG_SCOPE("broadcast()", "Parallel");
3133 
3134  // Spread data to remote processors.
3135  libmesh_call_mpi
3136  (MPI_Bcast (&data, 1, StandardType<T>(&data), root_id,
3137  this->get()));
3138 }
3139 
3140 
3141 
3142 template <typename Context, typename Iter, typename OutputIter>
3143 inline void Communicator::gather_packed_range(const unsigned int root_id,
3144  Context * context,
3145  Iter range_begin,
3146  const Iter range_end,
3147  OutputIter out_iter) const
3148 {
3149  typedef typename std::iterator_traits<Iter>::value_type T;
3150  typedef typename Parallel::Packing<T>::buffer_type buffer_t;
3151 
3152  bool nonempty_range = (range_begin != range_end);
3153  this->max(nonempty_range);
3154 
3155  while (nonempty_range)
3156  {
3157  // We will serialize variable size objects from *range_begin to
3158  // *range_end as a sequence of ints in this buffer
3159  std::vector<buffer_t> buffer;
3160 
3161  range_begin = Parallel::pack_range
3162  (context, range_begin, range_end, buffer);
3163 
3164  this->gather(root_id, buffer);
3165 
3167  (buffer, context, out_iter, (T*)(nullptr));
3168 
3169  nonempty_range = (range_begin != range_end);
3170  this->max(nonempty_range);
3171  }
3172 }
3173 
3174 
3175 template <typename Context, typename Iter, typename OutputIter>
3176 inline void Communicator::allgather_packed_range(Context * context,
3177  Iter range_begin,
3178  const Iter range_end,
3179  OutputIter out_iter) const
3180 {
3181  typedef typename std::iterator_traits<Iter>::value_type T;
3182  typedef typename Parallel::Packing<T>::buffer_type buffer_t;
3183 
3184  bool nonempty_range = (range_begin != range_end);
3185  this->max(nonempty_range);
3186 
3187  while (nonempty_range)
3188  {
3189  // We will serialize variable size objects from *range_begin to
3190  // *range_end as a sequence of ints in this buffer
3191  std::vector<buffer_t> buffer;
3192 
3193  range_begin = Parallel::pack_range
3194  (context, range_begin, range_end, buffer);
3195 
3196  this->allgather(buffer, false);
3197 
3198  libmesh_assert(buffer.size());
3199 
3201  (buffer, context, out_iter, (T*)nullptr);
3202 
3203  nonempty_range = (range_begin != range_end);
3204  this->max(nonempty_range);
3205  }
3206 }
3207 
3208 
3209 } // namespace Parallel
3210 
3211 } // namespace libMesh
3212 
3213 #endif // LIBMESH_PARALLEL_IMPLEMENTATION_H
data_type dataplusint_type< double >()
void set_union(T &data, const unsigned int root_id) const
void send(const unsigned int dest_processor_id, const T &buf, const MessageTag &tag=no_tag) const
data_type dataplusint_type< long >()
Iter pack_range(const Context *context, Iter range_begin, const Iter range_end, typename std::vector< buffertype > &buffer, std::size_t approx_buffer_size=1000000)
Definition: packing.h:139
void send_receive(const unsigned int dest_processor_id, const T1 &send, const unsigned int source_processor_id, T2 &recv, const MessageTag &send_tag=no_tag, const MessageTag &recv_tag=any_tag) const
int source() const
Definition: status.h:133
void maxloc(T &r, unsigned int &max_id) const
data_type dataplusint_type< short int >()
data_type dataplusint_type< float >()
processor_id_type size() const
Definition: communicator.h:175
void send_packed_range(const unsigned int dest_processor_id, const Context *context, Iter range_begin, const Iter range_end, const MessageTag &tag=no_tag) const
static void set_lowest(T &)
Definition: attributes.h:45
void allgather(const T &send, std::vector< T, A > &recv) const
MPI_Request request
Definition: request.h:40
void receive_packed_range(const unsigned int dest_processor_id, Context *context, OutputIter out, const T *output_type, const MessageTag &tag=any_tag) const
void gather(const unsigned int root_id, const T &send, std::vector< T, A > &recv) const
void alltoall(std::vector< T, A > &r) const
void nonblocking_send_packed_range(const unsigned int dest_processor_id, const Context *context, Iter range_begin, const Iter range_end, Request &req, const MessageTag &tag=no_tag) const
MPI_Datatype data_type
Definition: data_type.h:46
IterBase * end
long double max(long double a, double b)
std::size_t packed_range_size(const Context *context, Iter range_begin, const Iter range_end)
Definition: packing.h:118
void libmesh_ignore(const Args &...)
void minloc(T &r, unsigned int &min_id) const
processor_id_type rank() const
Definition: communicator.h:173
void send_receive_packed_range(const unsigned int dest_processor_id, const Context1 *context1, RangeIter send_begin, const RangeIter send_end, const unsigned int source_processor_id, Context2 *context2, OutputIter out, const T *output_type, const MessageTag &send_tag=no_tag, const MessageTag &recv_tag=any_tag) const
void scatter(const std::vector< T, A > &data, T &recv, const unsigned int root_id=0) const
void nonblocking_receive_packed_range(const unsigned int src_processor_id, Context *context, OutputIter out, const T *output_type, Request &req, Status &stat, const MessageTag &tag=any_tag) const
void add_prior_request(const Request &req)
Definition: request.C:190
status probe(const unsigned int src_processor_id, const MessageTag &tag=any_tag) const
unsigned int size(const data_type &type) const
Definition: status.h:152
data_type dataplusint_type< long double >()
data_type dataplusint_type()
void allgather_packed_range(Context *context, Iter range_begin, const Iter range_end, OutputIter out) const
const MessageTag no_tag
Definition: message_tag.h:120
Status packed_range_probe(const unsigned int src_processor_id, const MessageTag &tag, bool &flag) const
static PetscErrorCode Mat * A
Status receive(const unsigned int dest_processor_id, T &buf, const MessageTag &tag=any_tag) const
const MessageTag any_tag
Definition: message_tag.h:115
void add_post_wait_work(PostWaitWork *work)
Definition: request.C:204
IterBase * data
static void set_highest(T &)
Definition: attributes.h:46
OStreamProxy out(std::cout)
void unpack_range(const typename std::vector< buffertype > &buffer, Context *context, OutputIter out, const T *output_type)
void gather_packed_range(const unsigned int root_id, Context *context, Iter range_begin, const Iter range_end, OutputIter out) const
long double min(long double a, double b)
static const bool has_min_max
Definition: attributes.h:44
void broadcast(T &data, const unsigned int root_id=0) const
void broadcast_packed_range(const Context *context1, Iter range_begin, const Iter range_end, OutputContext *context2, OutputIter out, const unsigned int root_id=0) const