19 #ifndef LIBMESH_PARALLEL_IMPLEMENTATION_H 20 #define LIBMESH_PARALLEL_IMPLEMENTATION_H 38 #include <type_traits> 43 #ifdef LIBMESH_HAVE_MPI 52 #endif // LIBMESH_HAVE_MPI 75 template <
typename T,
typename A1,
typename A2>
76 inline void pack_vector_bool(
const std::vector<bool,A1> & vec_in,
77 std::vector<T,A2> & vec_out)
79 unsigned int data_bits = 8*
sizeof(T);
80 std::size_t in_size = vec_in.size();
81 std::size_t out_size = in_size/data_bits + ((in_size%data_bits)?1:0);
83 vec_out.resize(out_size);
84 for (std::size_t i=0; i != in_size; ++i)
86 std::size_t index = i/data_bits;
87 std::size_t offset = i%data_bits;
88 vec_out[index] += (vec_in[i]?1:0) << offset;
95 template <
typename T,
typename A1,
typename A2>
96 inline void unpack_vector_bool(
const std::vector<T,A1> & vec_in,
97 std::vector<bool,A2> & vec_out)
99 unsigned int data_bits = 8*
sizeof(T);
101 std::size_t out_size = vec_out.size();
102 libmesh_assert_equal_to
103 (out_size/data_bits + (out_size%data_bits?1:0), vec_in.size());
105 for (std::size_t i=0; i != out_size; ++i)
107 std::size_t index = i/data_bits;
108 std::size_t offset = i%data_bits;
109 vec_out[i] = vec_in[index] << (data_bits-1-offset) >> (data_bits-1);
114 #ifdef LIBMESH_HAVE_MPI 117 template <
typename T1,
typename T2,
typename A1,
typename A2,
typename A3,
typename A4>
118 inline void send_receive_vec_of_vec(
const unsigned int dest_processor_id,
119 const std::vector<std::vector<T1,A1>,A2> & send,
120 const unsigned int source_processor_id,
121 std::vector<std::vector<T2,A3>,A4> & recv,
126 LOG_SCOPE(
"send_receive()",
"Parallel");
128 if (dest_processor_id == comm.
rank() &&
129 source_processor_id == comm.
rank())
136 comm.
send (dest_processor_id, send,
request, send_tag);
137 comm.
receive (source_processor_id, recv, recv_tag);
141 #endif // LIBMESH_HAVE_MPI 153 #ifdef LIBMESH_HAVE_MPI 177 LOG_SCOPE(
"probe()",
"Parallel");
182 (MPI_Probe (src_processor_id, tag.
value(), this->
get(), &stat));
192 LOG_SCOPE(
"packed_range_probe()",
"Parallel");
194 libmesh_experimental();
200 libmesh_call_mpi(MPI_Iprobe(src_processor_id,
214 const std::basic_string<T> & buf,
217 LOG_SCOPE(
"send()",
"Parallel");
219 T * dataptr = buf.empty() ? nullptr :
const_cast<T *
>(buf.data());
223 MPI_Ssend : MPI_Send) (dataptr,
224 cast_int<int>(buf.size()),
233 template <
typename T>
235 const std::basic_string<T> & buf,
239 LOG_SCOPE(
"send()",
"Parallel");
241 T * dataptr = buf.empty() ? nullptr :
const_cast<T *
>(buf.data());
245 MPI_Issend : MPI_Isend) (dataptr,
246 cast_int<int>(buf.size()),
260 template <
typename T>
265 LOG_SCOPE(
"send()",
"Parallel");
267 T * dataptr =
const_cast<T*
> (&buf);
271 MPI_Ssend : MPI_Send) (dataptr,
281 template <
typename T>
287 LOG_SCOPE(
"send()",
"Parallel");
289 T * dataptr =
const_cast<T*
>(&buf);
293 MPI_Issend : MPI_Isend) (dataptr,
308 template <
typename T,
typename C,
typename A>
310 const std::set<T,C,A> & buf,
313 this->
send(dest_processor_id, buf,
319 template <
typename T,
typename C,
typename A>
321 const std::set<T,C,A> & buf,
323 const MessageTag & tag)
const 325 this->
send(dest_processor_id, buf,
326 StandardType<T>(buf.empty() ? nullptr : &(*buf.begin())), req, tag);
331 template <
typename T,
typename C,
typename A>
333 const std::set<T,C,A> & buf,
334 const DataType & type,
335 const MessageTag & tag)
const 337 LOG_SCOPE(
"send()",
"Parallel");
339 std::vector<T> vecbuf(buf.begin(), buf.end());
340 this->
send(dest_processor_id, vecbuf, type, tag);
345 template <
typename T,
typename C,
typename A>
347 const std::set<T,C,A> & buf,
348 const DataType & type,
350 const MessageTag & tag)
const 352 LOG_SCOPE(
"send()",
"Parallel");
356 std::vector<T> * vecbuf =
357 new std::vector<T,A>(buf.begin(), buf.end());
360 req.add_post_wait_work
361 (
new Parallel::PostWaitDeleteBuffer<std::vector<T,A>>(vecbuf));
363 this->
send(dest_processor_id, *vecbuf, type, req, tag);
368 template <
typename T,
typename A>
370 const std::vector<T,A> & buf,
371 const MessageTag & tag)
const 373 this->
send(dest_processor_id, buf,
374 StandardType<T>(buf.empty() ? nullptr : &buf.front()), tag);
379 template <
typename T,
typename A>
381 const std::vector<T,A> & buf,
383 const MessageTag & tag)
const 385 this->
send(dest_processor_id, buf,
386 StandardType<T>(buf.empty() ? nullptr : &buf.front()), req, tag);
391 template <
typename T,
typename A>
393 const std::vector<T,A> & buf,
394 const DataType & type,
395 const MessageTag & tag)
const 397 LOG_SCOPE(
"send()",
"Parallel");
401 MPI_Ssend : MPI_Send) (buf.empty() ? nullptr :
const_cast<T*
>(buf.data()),
402 cast_int<int>(buf.size()),
411 template <
typename T,
typename A>
413 const std::vector<T,A> & buf,
414 const DataType & type,
416 const MessageTag & tag)
const 418 LOG_SCOPE(
"send()",
"Parallel");
422 MPI_Issend : MPI_Isend) (buf.empty() ? nullptr :
const_cast<T*
>(buf.data()),
423 cast_int<int>(buf.size()),
431 req.add_post_wait_work
432 (
new Parallel::PostWaitDereferenceTag(tag));
437 template <
typename T,
typename A1,
typename A2>
439 const std::vector<std::vector<T,A1>,A2> & buf,
440 const MessageTag & tag)
const 442 this->
send(dest_processor_id, buf,
443 StandardType<T>((buf.empty() || buf.front().empty()) ?
444 nullptr : &(buf.front().front())), tag);
449 template <
typename T,
typename A1,
typename A2>
451 const std::vector<std::vector<T,A1>,A2> & buf,
453 const MessageTag & tag)
const 455 this->
send(dest_processor_id, buf,
456 StandardType<T>((buf.empty() || buf.front().empty()) ?
457 nullptr : &(buf.front().front())), req, tag);
462 template <
typename T,
typename A1,
typename A2>
464 const std::vector<std::vector<T,A1>,A2> & buf,
465 const DataType & type,
466 const MessageTag & tag)
const 471 this->
send(dest_processor_id, buf, type, req, tag);
477 template <
typename T,
typename A1,
typename A2>
479 const std::vector<std::vector<T,A1>,A2> & send_vecs,
480 const DataType & type,
482 const MessageTag & tag)
const 486 std::vector<char> * sendbuf =
new std::vector<char>();
498 int sendsize = packedsize;
500 const std::size_t n_vecs = send_vecs.size();
502 for (std::size_t i = 0; i != n_vecs; ++i)
511 sendsize += packedsize;
515 (MPI_Pack_size (libMesh::cast_int<int>(send_vecs[i].
size()), type,
516 this->
get(), &packedsize));
518 sendsize += packedsize;
521 libmesh_assert (sendsize );
522 sendbuf->resize (sendsize);
528 const int mpi_n_vecs = libMesh::cast_int<int>(n_vecs);
531 (MPI_Pack (&mpi_n_vecs, 1,
533 sendbuf->data(), sendsize, &pos, this->
get()));
535 for (std::size_t i = 0; i != n_vecs; ++i)
538 const int subvec_size = libMesh::cast_int<int>(send_vecs[i].size());
542 sendbuf->data(), sendsize, &pos, this->
get()));
545 if (!send_vecs[i].empty())
547 (MPI_Pack (const_cast<T*>(send_vecs[i].
data()),
548 libMesh::cast_int<int>(subvec_size), type,
549 sendbuf->data(), sendsize, &pos, this->
get()));
552 libmesh_assert_equal_to (pos, sendsize);
554 req.add_post_wait_work
555 (
new Parallel::PostWaitDeleteBuffer<std::vector<char>> (sendbuf));
557 this->
send (dest_processor_id, *sendbuf, MPI_PACKED, req, tag);
561 template <
typename Context,
typename Iter>
563 const Context * context,
565 const Iter range_end,
570 typedef typename std::iterator_traits<Iter>::value_type T;
572 std::size_t total_buffer_size =
575 this->
send(dest_processor_id, total_buffer_size, tag);
578 std::size_t used_buffer_size = 0;
581 while (range_begin != range_end)
583 libmesh_assert_greater (std::distance(range_begin, range_end), 0);
585 std::vector<typename Parallel::Packing<T>::buffer_type> buffer;
588 (context, range_begin, range_end, buffer);
590 libmesh_assert_greater (std::distance(range_begin, next_range_begin), 0);
592 range_begin = next_range_begin;
595 used_buffer_size += buffer.size();
599 this->
send(dest_processor_id, buffer, tag);
603 libmesh_assert_equal_to(used_buffer_size, total_buffer_size);
608 template <
typename Context,
typename Iter>
610 const Context * context,
612 const Iter range_end,
618 typedef typename std::iterator_traits<Iter>::value_type T;
621 std::size_t total_buffer_size =
627 std::size_t * total_buffer_size_buffer =
new std::size_t;
628 *total_buffer_size_buffer = total_buffer_size;
634 this->
send(dest_processor_id, *total_buffer_size_buffer, intermediate_req, tag);
641 std::size_t used_buffer_size = 0;
644 while (range_begin != range_end)
646 libmesh_assert_greater (std::distance(range_begin, range_end), 0);
648 std::vector<buffer_t> * buffer =
new std::vector<buffer_t>();
650 const Iter next_range_begin =
654 libmesh_assert_greater (std::distance(range_begin, next_range_begin), 0);
656 range_begin = next_range_begin;
659 used_buffer_size += buffer->size();
664 Request * my_req = (range_begin == range_end) ? &req : &next_intermediate_req;
672 this->
send(dest_processor_id, *buffer, *my_req, tag);
674 if (range_begin != range_end)
685 template <
typename Context,
typename Iter>
687 const Context * context,
689 const Iter range_end,
693 libmesh_experimental();
697 typedef typename std::iterator_traits<Iter>::value_type T;
700 if (range_begin != range_end)
702 std::vector<buffer_t> * buffer =
new std::vector<buffer_t>();
712 if (range_begin != range_end)
721 this->
send(dest_processor_id, *buffer, req, tag);
726 template <
typename T>
728 std::basic_string<T> & buf,
731 std::vector<T> tempbuf;
735 buf.assign(tempbuf.begin(), tempbuf.end());
741 template <
typename T>
743 std::basic_string<T> & buf,
750 std::vector<T> * tempbuf =
new std::vector<T>();
758 std::back_insert_iterator<std::basic_string<T>>>
759 (tempbuf, std::back_inserter(buf)));
765 this->
receive(src_processor_id, tempbuf, req, tag);
770 template <
typename T>
775 LOG_SCOPE(
"receive()",
"Parallel");
783 tag.
value(), this->
get(), stat.
get()));
790 template <
typename T>
796 LOG_SCOPE(
"receive()",
"Parallel");
800 tag.
value(), this->
get(), req.
get()));
809 template <
typename T,
typename C,
typename A>
811 std::set<T,C,A> & buf,
815 (src_processor_id, buf,
826 template <
typename T,
typename C,
typename A>
828 std::set<T,C,A> & buf,
830 const MessageTag & tag)
const 832 this->
receive (src_processor_id, buf,
833 StandardType<T>(buf.empty() ? nullptr : &(*buf.begin())), req, tag);
839 template <
typename T,
typename C,
typename A>
841 std::set<T,C,A> & buf,
842 const DataType & type,
843 const MessageTag & tag)
const 845 LOG_SCOPE(
"receive()",
"Parallel");
847 std::vector<T> vecbuf;
848 Status stat = this->
receive(src_processor_id, vecbuf, type, tag);
850 buf.insert(vecbuf.begin(), vecbuf.end());
862 template <
typename T,
typename C,
typename A>
864 std::set<T,C,A> & buf,
865 const DataType & type,
867 const MessageTag & tag)
const 869 LOG_SCOPE(
"receive()",
"Parallel");
873 std::vector<T> * vecbuf =
new std::vector<T>();
879 req.add_post_wait_work
880 (
new Parallel::PostWaitCopyBuffer<std::vector<T>,
881 std::insert_iterator<std::set<T,C,A>>>
882 (*vecbuf, std::inserter(buf,buf.end())));
885 req.add_post_wait_work
886 (
new Parallel::PostWaitDeleteBuffer<std::vector<T>>(vecbuf));
888 this->
receive(src_processor_id, *vecbuf, type, req, tag);
894 template <
typename T,
typename A>
896 std::vector<T,A> & buf,
897 const MessageTag & tag)
const 900 (src_processor_id, buf,
901 StandardType<T>(buf.empty() ? nullptr : &(*buf.begin())), tag);
906 template <
typename T,
typename A>
908 std::vector<T,A> & buf,
910 const MessageTag & tag)
const 912 this->
receive (src_processor_id, buf,
913 StandardType<T>(buf.empty() ? nullptr : &(*buf.begin())), req, tag);
918 template <
typename T,
typename A>
920 std::vector<T,A> & buf,
921 const DataType & type,
922 const MessageTag & tag)
const 924 LOG_SCOPE(
"receive()",
"Parallel");
928 Status stat(this->
probe(src_processor_id, tag), type);
930 buf.resize(stat.size());
936 (MPI_Recv (buf.empty() ? nullptr : buf.data(),
937 cast_int<int>(buf.size()), type, stat.source(),
938 stat.tag(), this->
get(), stat.get()));
940 libmesh_assert_equal_to (stat.size(), buf.size());
947 template <
typename T,
typename A>
949 std::vector<T,A> & buf,
950 const DataType & type,
952 const MessageTag & tag)
const 954 LOG_SCOPE(
"receive()",
"Parallel");
957 (MPI_Irecv (buf.empty() ? nullptr : buf.data(),
958 cast_int<int>(buf.size()), type, src_processor_id,
959 tag.value(), this->
get(), req.get()));
962 req.add_post_wait_work
963 (
new Parallel::PostWaitDereferenceTag(tag));
968 template <
typename T,
typename A1,
typename A2>
970 std::vector<std::vector<T,A1>,A2> & buf,
971 const MessageTag & tag)
const 974 (src_processor_id, buf,
975 StandardType<T>((buf.empty() || buf.front().empty()) ?
976 nullptr : &(buf.front().front())), tag);
981 template <
typename T,
typename A1,
typename A2>
983 std::vector<std::vector<T,A1>,A2> & buf,
985 const MessageTag & tag)
const 987 this->
receive (src_processor_id, buf,
988 StandardType<T>((buf.empty() || buf.front().empty()) ?
989 nullptr : &(buf.front().front())), req, tag);
994 template <
typename T,
typename A1,
typename A2>
996 std::vector<std::vector<T,A1>,A2> & recv,
997 const DataType & type,
998 const MessageTag & tag)
const 1002 std::vector<char> recvbuf;
1004 Status stat = this->
receive (src_processor_id, recvbuf, MPI_PACKED, tag);
1007 libmesh_assert (!recvbuf.empty());
1010 int bufsize = libMesh::cast_int<int>(recvbuf.size());
1011 int recvsize, pos=0;
1013 (MPI_Unpack (recvbuf.data(), bufsize, &pos,
1018 recv.resize (recvsize);
1020 const std::size_t n_vecs = recvsize;
1021 for (std::size_t i = 0; i != n_vecs; ++i)
1026 (MPI_Unpack (recvbuf.data(), bufsize, &pos,
1032 recv[i].resize (subvec_size);
1035 if (!recv[i].empty())
1037 (MPI_Unpack (recvbuf.data(), bufsize, &pos, recv[i].data(),
1038 subvec_size, type, this->
get()));
1059 template <
typename Context,
typename OutputIter,
typename T>
1062 OutputIter out_iter,
1063 const T * output_type,
1069 std::size_t total_buffer_size = 0;
1070 Status stat = this->
receive(src_processor_id, total_buffer_size, tag);
1076 std::size_t received_buffer_size = 0;
1077 while (received_buffer_size < total_buffer_size)
1079 std::vector<buffer_t> buffer;
1081 received_buffer_size += buffer.size();
1083 (buffer, context, out_iter, output_type);
1116 template <
typename Context,
typename OutputIter,
typename T>
1125 libmesh_experimental();
1133 std::vector<buffer_t> * buffer =
new std::vector<buffer_t>(stat.
size());
1134 this->
receive(src_processor_id, *buffer, req, tag);
1147 template <
typename T1,
typename T2,
typename A1,
typename A2>
1149 const std::vector<T1,A1> & sendvec,
1151 const unsigned int source_processor_id,
1152 std::vector<T2,A2> & recv,
1157 LOG_SCOPE(
"send_receive()",
"Parallel");
1159 if (dest_processor_id == this->
rank() &&
1160 source_processor_id == this->
rank())
1168 this->
send (dest_processor_id, sendvec, type1, req, send_tag);
1170 this->
receive (source_processor_id, recv, type2, recv_tag);
1177 template <
typename T1,
typename T2>
1180 const unsigned int source_processor_id,
1185 LOG_SCOPE(
"send_receive()",
"Parallel");
1187 if (dest_processor_id == this->
rank() &&
1188 source_processor_id == this->
rank())
1199 dest_processor_id, send_tag.
value(), &recv, 1,
1201 recv_tag.
value(), this->
get(), MPI_STATUS_IGNORE));
1213 template <
typename T,
typename A>
1215 const std::vector<T,A> & sendvec,
1216 const unsigned int source_processor_id,
1217 std::vector<T,A> & recv,
1221 if (dest_processor_id == this->
rank() &&
1222 source_processor_id == this->
rank())
1224 LOG_SCOPE(
"send_receive()",
"Parallel");
1229 const T* example = sendvec.empty() ?
1230 (recv.empty() ? nullptr : recv.data()) : sendvec.data();
1236 source_processor_id, recv,
1238 send_tag, recv_tag);
1244 template <
typename T1,
typename T2,
typename A1,
typename A2>
1246 const std::vector<T1,A1> & sendvec,
1247 const unsigned int source_processor_id,
1248 std::vector<T2,A2> & recv,
1256 source_processor_id, recv,
1258 send_tag, recv_tag);
1264 template <
typename T1,
typename T2,
typename A1,
typename A2,
typename A3,
typename A4>
1266 const std::vector<std::vector<T1,A1>,A2> & sendvec,
1267 const unsigned int source_processor_id,
1268 std::vector<std::vector<T2,A3>,A4> & recv,
1269 const MessageTag & ,
1270 const MessageTag & )
const 1273 send_receive_vec_of_vec
1274 (dest_processor_id, sendvec, source_processor_id, recv,
1282 template <
typename T,
typename A1,
typename A2>
1284 const std::vector<std::vector<T,A1>,A2> & sendvec,
1285 const unsigned int source_processor_id,
1286 std::vector<std::vector<T,A1>,A2> & recv,
1287 const MessageTag & ,
1288 const MessageTag & )
const 1291 send_receive_vec_of_vec
1292 (dest_processor_id, sendvec, source_processor_id, recv,
1299 template <
typename Context1,
typename RangeIter,
typename Context2,
1300 typename OutputIter,
typename T>
1303 const Context1 * context1,
1304 RangeIter send_begin,
1305 const RangeIter send_end,
1306 const unsigned int source_processor_id,
1307 Context2 * context2,
1308 OutputIter out_iter,
1309 const T * output_type,
1313 LOG_SCOPE(
"send_receive()",
"Parallel");
1321 output_type, recv_tag);
1328 template <
typename Context,
typename Iter>
1330 const Context * context,
1332 const Iter range_end,
1334 std::shared_ptr<std::vector<
typename Parallel::Packing<
typename std::iterator_traits<Iter>::value_type>::buffer_type>> & buffer,
1337 libmesh_experimental();
1341 typedef typename std::iterator_traits<Iter>::value_type T;
1344 if (range_begin != range_end)
1346 if (buffer ==
nullptr)
1347 buffer = std::make_shared<std::vector<buffer_t>>();
1359 if (range_begin != range_end)
1367 this->
send(dest_processor_id, *buffer, req, tag);
1373 template <
typename T,
typename A>
1375 std::vector<std::basic_string<T>,
A> & recv,
1376 const bool identical_buffer_sizes)
const 1378 LOG_SCOPE (
"allgather()",
"Parallel");
1380 libmesh_assert(this->
size());
1381 recv.assign(this->
size(),
"");
1384 if (this->
size() < 2)
1392 sendlengths (this->
size(), 0),
1393 displacements(this->
size(), 0);
1395 const int mysize =
static_cast<int>(sendval.size());
1397 if (identical_buffer_sizes)
1398 sendlengths.assign(this->
size(), mysize);
1405 unsigned int globalsize = 0;
1406 for (
unsigned int i=0; i != this->
size(); ++i)
1408 displacements[i] = globalsize;
1409 globalsize += sendlengths[i];
1413 if (globalsize == 0)
1417 std::string r(globalsize, 0);
1421 (MPI_Allgatherv (const_cast<T*>(mysize ? sendval.data() :
nullptr),
1423 &r[0], sendlengths.data(), displacements.data(),
1427 for (
unsigned int i=0; i != this->
size(); ++i)
1428 recv[i] = r.substr(displacements[i], sendlengths[i]);
1436 if (this->
size() == 1)
1438 libmesh_assert (!this->
rank());
1439 libmesh_assert (!root_id);
1443 libmesh_assert_less (root_id, this->
size());
1445 LOG_SCOPE(
"broadcast()",
"Parallel");
1449 char char_data =
data;
1454 root_id, this->
get()));
1460 template <
typename T>
1462 const unsigned int root_id)
const 1464 if (this->
size() == 1)
1466 libmesh_assert (!this->
rank());
1467 libmesh_assert (!root_id);
1471 libmesh_assert_less (root_id, this->
size());
1473 LOG_SCOPE(
"broadcast()",
"Parallel");
1475 std::size_t data_size =
data.size();
1478 std::vector<T> data_c(data_size);
1480 std::string orig(
data);
1483 if (this->
rank() == root_id)
1484 for (std::size_t i=0; i<
data.size(); i++)
1485 data_c[i] =
data[i];
1489 data.assign(data_c.begin(), data_c.end());
1492 if (this->
rank() == root_id)
1493 libmesh_assert_equal_to (
data, orig);
1499 template <
typename T,
typename A>
1501 const unsigned int root_id)
const 1503 if (this->
size() == 1)
1505 libmesh_assert (!this->
rank());
1506 libmesh_assert (!root_id);
1510 libmesh_assert_less (root_id, this->
size());
1512 LOG_SCOPE(
"broadcast()",
"Parallel");
1516 T * data_ptr =
data.empty() ? nullptr :
data.data();
1519 (MPI_Bcast (data_ptr, cast_int<int>(
data.size()),
1520 StandardType<T>(data_ptr), root_id, this->
get()));
1524 template <
typename T,
typename A>
1526 const unsigned int root_id)
const 1528 if (this->
size() == 1)
1530 libmesh_assert (!this->
rank());
1531 libmesh_assert (!root_id);
1535 libmesh_assert_less (root_id, this->
size());
1537 LOG_SCOPE(
"broadcast()",
"Parallel");
1539 std::size_t bufsize=0;
1540 if (root_id == this->
rank())
1542 for (std::size_t i=0; i<
data.size(); ++i)
1548 std::vector<unsigned int> temp; temp.reserve(bufsize);
1550 if (root_id == this->
rank())
1552 for (std::size_t i=0; i<
data.size(); ++i)
1554 temp.push_back(cast_int<unsigned int>(
data[i].
size()));
1555 for (std::size_t j=0; j !=
data[i].size(); ++j)
1560 temp.push_back(
data[i][j]);
1564 temp.resize(bufsize);
1570 if (root_id != this->
rank())
1573 typename std::vector<unsigned int>::const_iterator iter = temp.begin();
1574 while (iter != temp.end())
1576 std::size_t curr_len = *iter++;
1577 data.push_back(std::string(iter, iter+curr_len));
1586 template <
typename T,
typename C,
typename A>
1588 const unsigned int root_id)
const 1590 if (this->
size() == 1)
1592 libmesh_assert (!this->
rank());
1593 libmesh_assert (!root_id);
1597 libmesh_assert_less (root_id, this->
size());
1599 LOG_SCOPE(
"broadcast()",
"Parallel");
1601 std::vector<T> vecdata;
1602 if (this->
rank() == root_id)
1603 vecdata.assign(
data.begin(),
data.end());
1605 std::size_t vecsize = vecdata.size();
1607 if (this->
rank() != root_id)
1608 vecdata.resize(vecsize);
1611 if (this->
rank() != root_id)
1614 data.insert(vecdata.begin(), vecdata.end());
1620 template <
typename T1,
typename T2,
typename C,
typename A>
1622 const unsigned int root_id)
const 1624 if (this->
size() == 1)
1626 libmesh_assert (!this->
rank());
1627 libmesh_assert (!root_id);
1631 libmesh_assert_less (root_id, this->
size());
1633 LOG_SCOPE(
"broadcast()",
"Parallel");
1635 std::size_t data_size=
data.size();
1638 std::vector<T1> pair_first; pair_first.reserve(data_size);
1639 std::vector<T2> pair_second; pair_first.reserve(data_size);
1641 if (root_id == this->
rank())
1643 for (
const auto & pr :
data)
1645 pair_first.push_back(pr.first);
1646 pair_second.push_back(pr.second);
1651 pair_first.resize(data_size);
1652 pair_second.resize(data_size);
1658 libmesh_assert(pair_first.size() == pair_first.size());
1660 if (this->
rank() != root_id)
1663 for (std::size_t i=0; i<pair_first.size(); ++i)
1664 data[pair_first[i]] = pair_second[i];
1670 template <
typename Context,
typename OutputContext,
1671 typename Iter,
typename OutputIter>
1674 const Iter range_end,
1675 OutputContext * context2,
1676 OutputIter out_iter,
1677 const unsigned int root_id)
const 1679 typedef typename std::iterator_traits<Iter>::value_type T;
1686 std::vector<buffer_t> buffer;
1688 if (this->
rank() == root_id)
1690 (context1, range_begin, range_end, buffer);
1694 std::size_t buffer_size = buffer.size();
1701 buffer.resize(buffer_size);
1706 if (this->
rank() != root_id)
1708 (buffer, context2, out_iter, (T*)
nullptr);
1714 template <
typename Context,
typename OutputIter,
typename T>
1724 libmesh_experimental();
1727 if (buffer ==
nullptr)
1728 buffer = std::make_shared<std::vector<typename Parallel::Packing<T>::buffer_type>>();
1736 buffer->resize(stat.
size());
1737 this->
receive(src_processor_id, *buffer, req, tag);
1749 #else // LIBMESH_HAVE_MPI 1756 { libmesh_not_implemented();
status s;
return s; }
1761 template <
typename T>
1764 const MessageTag &)
const 1765 { libmesh_not_implemented(); }
1767 template <
typename T>
1771 const MessageTag &)
const 1772 { libmesh_not_implemented(); }
1774 template <
typename T>
1779 { libmesh_not_implemented(); }
1781 template <
typename T>
1787 { libmesh_not_implemented(); }
1789 template <
typename Context,
typename Iter>
1795 { libmesh_not_implemented(); }
1797 template <
typename Context,
typename Iter>
1803 const MessageTag &)
const 1804 { libmesh_not_implemented(); }
1809 template <
typename T>
1812 const MessageTag &)
const 1813 { libmesh_not_implemented();
return Status(); }
1815 template <
typename T>
1819 const MessageTag &)
const 1820 { libmesh_not_implemented(); }
1822 template <
typename T>
1827 { libmesh_not_implemented();
return Status(); }
1829 template <
typename T>
1835 { libmesh_not_implemented(); }
1837 template <
typename Context,
typename OutputIter,
typename T>
1844 { libmesh_not_implemented(); }
1853 template <
typename T1,
typename T2>
1855 const T1 & send_val,
1856 const unsigned int recv_source,
1859 const MessageTag &)
const 1861 libmesh_assert_equal_to (send_tgt, 0);
1862 libmesh_assert_equal_to (recv_source, 0);
1863 recv_val = send_val;
1872 template <
typename Context1,
typename RangeIter,
1873 typename Context2,
typename OutputIter,
typename T>
1876 (
const unsigned int libmesh_dbg_var(dest_processor_id),
1877 const Context1 * context1,
1878 RangeIter send_begin,
1879 const RangeIter send_end,
1880 const unsigned int libmesh_dbg_var(source_processor_id),
1881 Context2 * context2,
1882 OutputIter out_iter,
1883 const T * output_type,
1889 libmesh_assert_equal_to(dest_processor_id, 0);
1890 libmesh_assert_equal_to(source_processor_id, 0);
1894 typedef typename std::iterator_traits<RangeIter>::value_type T1;
1897 while (send_begin != send_end)
1899 libmesh_assert_greater (std::distance(send_begin, send_end), 0);
1903 std::vector<buffer_t> buffer;
1906 (context1, send_begin, send_end, buffer);
1908 libmesh_assert_greater (std::distance(send_begin, next_send_begin), 0);
1910 send_begin = next_send_begin;
1913 (buffer, context2, out_iter, output_type);
1917 #endif // LIBMESH_HAVE_MPI 1927 template <
typename T>
1932 T tempmin = r, tempmax = r;
1935 bool verified = (r == tempmin) &&
1937 this->
min(verified);
1942 "Tried to verify an unverifiable type");
1947 template <
typename T>
1954 tempmin = tempmax = *r;
1962 bool invalid = r && ((*r != tempmin) ||
1969 "Tried to semiverify an unverifiable type");
1978 const unsigned char rnew = r;
1979 return this->
verify(rnew);
1988 const unsigned char rnew = *r;
1992 const unsigned char * rptr =
nullptr;
1998 template <
typename T,
typename A>
2003 std::size_t rsize = r ? r->size() : 0;
2004 std::size_t * psize = r ? &rsize :
nullptr;
2011 std::vector<T,A> tempmin, tempmax;
2014 tempmin = tempmax = *r;
2018 tempmin.resize(rsize);
2019 tempmax.resize(rsize);
2020 Attributes<std::vector<T,A>>::set_highest(tempmin);
2021 Attributes<std::vector<T,A>>::set_lowest(tempmax);
2025 bool invalid = r && ((*r != tempmin) ||
2032 "Tried to semiverify a vector of an unverifiable type");
2041 if (this->
size() > 1)
2045 std::vector<short int> temp; temp.reserve(r.size());
2046 for (std::size_t i=0; i != r.size(); ++i)
2047 temp.push_back(r[i]);
2048 return this->
verify(temp);
2057 if (this->
size() > 1)
2059 std::size_t rsize = r ? r->size() : 0;
2060 std::size_t * psize = r ? &rsize :
nullptr;
2069 std::vector<short int> temp (rsize);
2072 temp.reserve(rsize);
2073 for (std::size_t i=0; i != rsize; ++i)
2074 temp.push_back((*r)[i]);
2077 std::vector<short int> * ptemp = r ? &temp:
nullptr;
2086 template <
typename T>
2089 if (this->
size() > 1)
2091 LOG_SCOPE(
"min(scalar)",
"Parallel");
2094 (MPI_Allreduce (MPI_IN_PLACE, &r, 1,
2104 if (this->
size() > 1)
2106 LOG_SCOPE(
"min(bool)",
"Parallel");
2108 unsigned int temp = r;
2110 (MPI_Allreduce (MPI_IN_PLACE, &temp, 1,
2119 template <
typename T,
typename A>
2122 if (this->
size() > 1 && !r.empty())
2124 LOG_SCOPE(
"min(vector)",
"Parallel");
2126 libmesh_assert(this->
verify(r.size()));
2129 (MPI_Allreduce (MPI_IN_PLACE, r.data(),
2130 cast_int<int>(r.size()),
2131 StandardType<T>(r.data()),
2138 template <
typename A>
2141 if (this->
size() > 1 && !r.empty())
2143 LOG_SCOPE(
"min(vector<bool>)",
"Parallel");
2145 libmesh_assert(this->
verify(r.size()));
2147 std::vector<unsigned int> ruint;
2148 pack_vector_bool(r, ruint);
2149 std::vector<unsigned int> temp(ruint.size());
2151 (MPI_Allreduce (ruint.data(), temp.data(),
2152 cast_int<int>(ruint.size()),
2155 unpack_vector_bool(temp, r);
2160 template <
typename T>
2162 unsigned int & min_id)
const 2164 if (this->
size() > 1)
2166 LOG_SCOPE(
"minloc(scalar)",
"Parallel");
2173 (MPI_Allreduce (MPI_IN_PLACE, &data_in, 1, dataplusint_type<T>(),
2176 min_id = data_in.
rank;
2179 min_id = this->
rank();
2184 unsigned int & min_id)
const 2186 if (this->
size() > 1)
2188 LOG_SCOPE(
"minloc(bool)",
"Parallel");
2196 (MPI_Allreduce (&data_in, &data_out, 1,
2200 min_id = data_out.
rank;
2203 min_id = this->
rank();
2207 template <
typename T,
typename A1,
typename A2>
2209 std::vector<unsigned int,A2> & min_id)
const 2211 if (this->
size() > 1 && !r.empty())
2213 LOG_SCOPE(
"minloc(vector)",
"Parallel");
2215 libmesh_assert(this->
verify(r.size()));
2217 std::vector<DataPlusInt<T>> data_in(r.size());
2218 for (std::size_t i=0; i != r.size(); ++i)
2220 data_in[i].
val = r[i];
2223 std::vector<DataPlusInt<T>> data_out(r.size());
2225 (MPI_Allreduce (data_in.data(), data_out.data(),
2226 cast_int<int>(r.size()),
2227 dataplusint_type<T>(),
2229 for (std::size_t i=0; i != r.size(); ++i)
2231 r[i] = data_out[i].
val;
2232 min_id[i] = data_out[i].
rank;
2235 else if (!r.empty())
2237 for (std::size_t i=0; i != r.size(); ++i)
2238 min_id[i] = this->
rank();
2243 template <
typename A1,
typename A2>
2245 std::vector<unsigned int,A2> & min_id)
const 2247 if (this->
size() > 1 && !r.empty())
2249 LOG_SCOPE(
"minloc(vector<bool>)",
"Parallel");
2251 libmesh_assert(this->
verify(r.size()));
2253 std::vector<DataPlusInt<int>> data_in(r.size());
2254 for (std::size_t i=0; i != r.size(); ++i)
2256 data_in[i].
val = r[i];
2259 std::vector<DataPlusInt<int>> data_out(r.size());
2261 (MPI_Allreduce (data_in.data(), data_out.data(),
2262 cast_int<int>(r.size()),
2265 for (std::size_t i=0; i != r.size(); ++i)
2267 r[i] = data_out[i].
val;
2268 min_id[i] = data_out[i].
rank;
2271 else if (!r.empty())
2273 for (std::size_t i=0; i != r.size(); ++i)
2274 min_id[i] = this->
rank();
2279 template <
typename T>
2282 if (this->
size() > 1)
2284 LOG_SCOPE(
"max(scalar)",
"Parallel");
2287 (MPI_Allreduce (MPI_IN_PLACE, &r, 1,
2297 if (this->
size() > 1)
2299 LOG_SCOPE(
"max(bool)",
"Parallel");
2301 unsigned int temp = r;
2303 (MPI_Allreduce (MPI_IN_PLACE, &temp, 1,
2312 template <
typename T,
typename A>
2315 if (this->
size() > 1 && !r.empty())
2317 LOG_SCOPE(
"max(vector)",
"Parallel");
2319 libmesh_assert(this->
verify(r.size()));
2322 (MPI_Allreduce (MPI_IN_PLACE, r.data(),
2323 cast_int<int>(r.size()),
2324 StandardType<T>(r.data()),
2331 template <
typename A>
2334 if (this->
size() > 1 && !r.empty())
2336 LOG_SCOPE(
"max(vector<bool>)",
"Parallel");
2338 libmesh_assert(this->
verify(r.size()));
2340 std::vector<unsigned int> ruint;
2341 pack_vector_bool(r, ruint);
2342 std::vector<unsigned int> temp(ruint.size());
2344 (MPI_Allreduce (ruint.data(), temp.data(),
2345 cast_int<int>(ruint.size()),
2348 unpack_vector_bool(temp, r);
2353 template <
typename T>
2355 unsigned int & max_id)
const 2357 if (this->
size() > 1)
2359 LOG_SCOPE(
"maxloc(scalar)",
"Parallel");
2366 (MPI_Allreduce (MPI_IN_PLACE, &data_in, 1,
2367 dataplusint_type<T>(),
2371 max_id = data_in.
rank;
2374 max_id = this->
rank();
2379 unsigned int & max_id)
const 2381 if (this->
size() > 1)
2383 LOG_SCOPE(
"maxloc(bool)",
"Parallel");
2391 (MPI_Allreduce (&data_in, &data_out, 1,
2396 max_id = data_out.
rank;
2399 max_id = this->
rank();
2403 template <
typename T,
typename A1,
typename A2>
2405 std::vector<unsigned int,A2> & max_id)
const 2407 if (this->
size() > 1 && !r.empty())
2409 LOG_SCOPE(
"maxloc(vector)",
"Parallel");
2411 libmesh_assert(this->
verify(r.size()));
2413 std::vector<DataPlusInt<T>> data_in(r.size());
2414 for (std::size_t i=0; i != r.size(); ++i)
2416 data_in[i].
val = r[i];
2419 std::vector<DataPlusInt<T>> data_out(r.size());
2421 (MPI_Allreduce (data_in.data(), data_out.data(),
2422 cast_int<int>(r.size()),
2423 dataplusint_type<T>(),
2426 for (std::size_t i=0; i != r.size(); ++i)
2428 r[i] = data_out[i].
val;
2429 max_id[i] = data_out[i].
rank;
2432 else if (!r.empty())
2434 for (std::size_t i=0; i != r.size(); ++i)
2435 max_id[i] = this->
rank();
2440 template <
typename A1,
typename A2>
2442 std::vector<unsigned int,A2> & max_id)
const 2444 if (this->
size() > 1 && !r.empty())
2446 LOG_SCOPE(
"maxloc(vector<bool>)",
"Parallel");
2448 libmesh_assert(this->
verify(r.size()));
2450 std::vector<DataPlusInt<int>> data_in(r.size());
2451 for (std::size_t i=0; i != r.size(); ++i)
2453 data_in[i].
val = r[i];
2456 std::vector<DataPlusInt<int>> data_out(r.size());
2458 (MPI_Allreduce (data_in.data(), data_out.data(),
2459 cast_int<int>(r.size()),
2463 for (std::size_t i=0; i != r.size(); ++i)
2465 r[i] = data_out[i].
val;
2466 max_id[i] = data_out[i].
rank;
2469 else if (!r.empty())
2471 for (std::size_t i=0; i != r.size(); ++i)
2472 max_id[i] = this->
rank();
2477 template <
typename T>
2480 if (this->
size() > 1)
2482 LOG_SCOPE(
"sum()",
"Parallel");
2485 (MPI_Allreduce (MPI_IN_PLACE, &r, 1,
2493 template <
typename T,
typename A>
2496 if (this->
size() > 1 && !r.empty())
2498 LOG_SCOPE(
"sum()",
"Parallel");
2500 libmesh_assert(this->
verify(r.size()));
2503 (MPI_Allreduce (MPI_IN_PLACE, r.data(),
2504 cast_int<int>(r.size()),
2514 template <
typename T>
2517 if (this->
size() > 1)
2519 LOG_SCOPE(
"sum()",
"Parallel");
2522 (MPI_Allreduce (MPI_IN_PLACE, &r, 2,
2530 template <
typename T,
typename A>
2533 if (this->
size() > 1 && !r.empty())
2535 LOG_SCOPE(
"sum()",
"Parallel");
2537 libmesh_assert(this->
verify(r.size()));
2540 (MPI_Allreduce (MPI_IN_PLACE, r.data(),
2541 cast_int<int>(r.size() * 2),
2549 template <
typename T,
typename C,
typename A>
2551 const unsigned int root_id)
const 2553 if (this->
size() > 1)
2555 std::vector<T> vecdata(
data.begin(),
data.end());
2556 this->
gather(root_id, vecdata);
2557 if (this->
rank() == root_id)
2558 data.insert(vecdata.begin(), vecdata.end());
2564 template <
typename T,
typename C,
typename A>
2567 if (this->
size() > 1)
2569 std::vector<T> vecdata(
data.begin(),
data.end());
2571 data.insert(vecdata.begin(), vecdata.end());
2577 template <
typename T1,
typename T2,
typename C,
typename A>
2579 const unsigned int root_id)
const 2581 if (this->
size() > 1)
2583 std::vector<std::pair<T1,T2>> vecdata(
data.begin(),
data.end());
2584 this->
gather(root_id, vecdata);
2585 if (this->
rank() == root_id)
2586 data.insert(vecdata.begin(), vecdata.end());
2592 template <
typename T1,
typename T2,
typename C,
typename A>
2595 if (this->
size() > 1)
2597 std::vector<std::pair<T1,T2>> vecdata(
data.begin(),
data.end());
2599 data.insert(vecdata.begin(), vecdata.end());
2605 template <
typename T,
typename A>
2608 std::vector<T,A> & recv)
const 2610 libmesh_assert_less (root_id, this->
size());
2612 if (this->
rank() == root_id)
2613 recv.resize(this->
size());
2615 if (this->
size() > 1)
2617 LOG_SCOPE(
"gather()",
"Parallel");
2622 (MPI_Gather(const_cast<T*>(&sendval), 1, send_type,
2623 recv.empty() ? nullptr : recv.data(), 1, send_type,
2624 root_id, this->
get()));
2632 template <
typename T,
typename A>
2634 std::vector<T,A> & r)
const 2636 if (this->
size() == 1)
2638 libmesh_assert (!this->
rank());
2639 libmesh_assert (!root_id);
2643 libmesh_assert_less (root_id, this->
size());
2646 sendlengths (this->
size(), 0),
2647 displacements(this->
size(), 0);
2649 const int mysize =
static_cast<int>(r.size());
2652 LOG_SCOPE(
"gather()",
"Parallel");
2656 unsigned int globalsize = 0;
2657 for (
unsigned int i=0; i != this->
size(); ++i)
2659 displacements[i] = globalsize;
2660 globalsize += sendlengths[i];
2664 if (globalsize == 0)
2668 std::vector<T,A> r_src(r);
2672 if (root_id == this->
rank())
2673 r.resize(globalsize);
2677 (MPI_Gatherv (r_src.empty() ? nullptr : r_src.data(), mysize,
2679 sendlengths.data(), displacements.data(),
2685 template <
typename T,
typename A>
2687 const std::basic_string<T> & sendval,
2688 std::vector<std::basic_string<T>,
A> & recv,
2689 const bool identical_buffer_sizes)
const 2691 libmesh_assert_less (root_id, this->
size());
2693 if (this->
rank() == root_id)
2694 recv.resize(this->
size());
2696 if (this->
size() > 1)
2698 LOG_SCOPE (
"gather()",
"Parallel");
2701 sendlengths (this->
size(), 0),
2702 displacements(this->
size(), 0);
2704 const int mysize =
static_cast<int>(sendval.size());
2706 if (identical_buffer_sizes)
2707 sendlengths.assign(this->
size(), mysize);
2710 this->
gather(root_id, mysize, sendlengths);
2714 unsigned int globalsize = 0;
2715 for (
unsigned int i=0; i < this->
size(); ++i)
2717 displacements[i] = globalsize;
2718 globalsize += sendlengths[i];
2723 if (this->
rank() == root_id)
2724 r.resize(globalsize, 0);
2728 (MPI_Gatherv (const_cast<T*>(sendval.data()),
2730 this->
rank() == root_id ? &r[0] :
nullptr,
2731 sendlengths.data(), displacements.data(),
2735 if (this->
rank() == root_id)
2736 for (
unsigned int i=0; i != this->
size(); ++i)
2737 recv[i] = r.substr(displacements[i], sendlengths[i]);
2745 template <
typename T,
typename A>
2747 std::vector<T,A> & recv)
const 2749 LOG_SCOPE (
"allgather()",
"Parallel");
2751 libmesh_assert(this->
size());
2752 recv.resize(this->
size());
2754 unsigned int comm_size = this->
size();
2760 (MPI_Allgather (const_cast<T*>(&sendval), 1, send_type, recv.data(), 1,
2761 send_type, this->
get()));
2763 else if (comm_size > 0)
2769 template <
typename T,
typename A>
2771 const bool identical_buffer_sizes)
const 2773 if (this->
size() < 2)
2776 LOG_SCOPE(
"allgather()",
"Parallel");
2778 if (identical_buffer_sizes)
2783 libmesh_assert(this->
verify(r.size()));
2785 std::vector<T,A> r_src(r.size()*this->
size());
2790 (MPI_Allgather (r_src.data(), cast_int<int>(r_src.size()),
2791 send_type, r.data(), cast_int<int>(r_src.size()),
2792 send_type, this->
get()));
2798 sendlengths (this->
size(), 0),
2799 displacements(this->
size(), 0);
2801 const int mysize =
static_cast<int>(r.size());
2806 unsigned int globalsize = 0;
2807 for (
unsigned int i=0; i != this->
size(); ++i)
2809 displacements[i] = globalsize;
2810 globalsize += sendlengths[i];
2814 if (globalsize == 0)
2818 std::vector<T,A> r_src(globalsize);
2826 (MPI_Allgatherv (r_src.empty() ? nullptr : r_src.data(), mysize,
2827 send_type, r.data(), sendlengths.data(),
2828 displacements.data(), send_type, this->
get()));
2833 template <
typename T,
typename A>
2835 const bool identical_buffer_sizes)
const 2837 if (this->
size() < 2)
2840 LOG_SCOPE(
"allgather()",
"Parallel");
2842 if (identical_buffer_sizes)
2844 libmesh_assert(this->
verify(r.size()));
2854 std::vector<int> mystrlengths (r.size());
2855 std::vector<T> concat_src;
2857 int myconcatsize = 0;
2858 for (
unsigned int i=0; i != r.size(); ++i)
2860 int stringlen = cast_int<int>(r[i].size());
2861 mystrlengths[i] = stringlen;
2862 myconcatsize += stringlen;
2864 concat_src.reserve(myconcatsize);
2865 for (
unsigned int i=0; i != r.size(); ++i)
2867 (concat_src.end(), r[i].begin(), r[i].end());
2870 std::vector<int> strlengths = mystrlengths;
2871 this->
allgather(strlengths, identical_buffer_sizes);
2874 r.resize(strlengths.size());
2877 std::vector<int> concat_sizes;
2878 this->
allgather(myconcatsize, concat_sizes);
2882 std::vector<int> displacements(this->
size(), 0);
2883 unsigned int globalsize = 0;
2884 for (
unsigned int i=0; i != this->
size(); ++i)
2886 displacements[i] = globalsize;
2887 globalsize += concat_sizes[i];
2891 if (globalsize == 0)
2896 std::vector<T> concat(globalsize);
2903 (MPI_Allgatherv (concat_src.empty() ?
2904 nullptr : concat_src.data(), myconcatsize,
2905 send_type, concat.data(), concat_sizes.data(),
2906 displacements.data(), send_type, this->
get()));
2909 const T * begin = concat.data();
2910 for (
unsigned int i=0; i != r.size(); ++i)
2912 const T *
end = begin + strlengths[i];
2913 r[i].assign(begin,
end);
2920 template <
typename T,
typename A>
2923 const unsigned int root_id)
const 2926 libmesh_assert_less (root_id, this->
size());
2930 libmesh_assert (this->
rank() != root_id || this->
size() == data.size());
2932 if (this->
size() == 1)
2934 libmesh_assert (!this->
rank());
2935 libmesh_assert (!root_id);
2940 LOG_SCOPE(
"scatter()",
"Parallel");
2942 T * data_ptr =
const_cast<T*
>(
data.empty() ? nullptr :
data.data());
2952 template <
typename T,
typename A>
2954 std::vector<T,A> & recv,
2955 const unsigned int root_id)
const 2957 libmesh_assert_less (root_id, this->
size());
2959 if (this->
size() == 1)
2961 libmesh_assert (!this->
rank());
2962 libmesh_assert (!root_id);
2963 recv.assign(
data.begin(),
data.end());
2967 LOG_SCOPE(
"scatter()",
"Parallel");
2969 int recv_buffer_size = 0;
2970 if (this->
rank() == root_id)
2972 libmesh_assert(
data.size() % this->
size() == 0);
2973 recv_buffer_size = cast_int<int>(
data.size() / this->
size());
2977 recv.resize(recv_buffer_size);
2979 T * data_ptr =
const_cast<T*
>(
data.empty() ? nullptr :
data.data());
2980 T * recv_ptr = recv.empty() ? nullptr : recv.data();
2985 recv_ptr, recv_buffer_size,
StandardType<T>(recv_ptr), root_id, this->
get()));
2990 template <
typename T,
typename A1,
typename A2>
2992 const std::vector<int,A2> counts,
2993 std::vector<T,A1> & recv,
2994 const unsigned int root_id)
const 2996 libmesh_assert_less (root_id, this->
size());
2998 if (this->
size() == 1)
3000 libmesh_assert (!this->
rank());
3001 libmesh_assert (!root_id);
3002 libmesh_assert (counts.size() == this->
size());
3003 recv.assign(
data.begin(),
data.begin() + counts[0]);
3007 std::vector<int,A2> displacements(this->
size(), 0);
3008 if (root_id == this->
rank())
3010 libmesh_assert(counts.size() == this->
size());
3013 unsigned int globalsize = 0;
3014 for (
unsigned int i=0; i < this->
size(); ++i)
3016 displacements[i] = globalsize;
3017 globalsize += counts[i];
3020 libmesh_assert(
data.size() == globalsize);
3023 LOG_SCOPE(
"scatter()",
"Parallel");
3026 int recv_buffer_size = 0;
3027 this->
scatter(counts, recv_buffer_size, root_id);
3028 recv.resize(recv_buffer_size);
3030 T * data_ptr =
const_cast<T*
>(
data.empty() ? nullptr :
data.data());
3031 int * count_ptr =
const_cast<int*
>(counts.empty() ? nullptr : counts.data());
3032 T * recv_ptr = recv.empty() ? nullptr : recv.data();
3037 (MPI_Scatterv (data_ptr, count_ptr, displacements.data(),
StandardType<T>(data_ptr),
3038 recv_ptr, recv_buffer_size,
StandardType<T>(recv_ptr), root_id, this->
get()));
3043 template <
typename T,
typename A1,
typename A2>
3045 std::vector<T,A1> & recv,
3046 const unsigned int root_id,
3047 const bool identical_buffer_sizes)
const 3049 libmesh_assert_less (root_id, this->
size());
3051 if (this->
size() == 1)
3053 libmesh_assert (!this->
rank());
3054 libmesh_assert (!root_id);
3055 libmesh_assert (
data.size() == this->
size());
3060 std::vector<T,A1> stacked_data;
3061 std::vector<int> counts;
3063 if (root_id == this->
rank())
3065 libmesh_assert (
data.size() == this->
size());
3067 if (!identical_buffer_sizes)
3068 counts.resize(this->
size());
3070 for (std::size_t i=0; i <
data.size(); ++i)
3072 if (!identical_buffer_sizes)
3073 counts[i] = cast_int<int>(
data[i].size());
3079 std::copy(
data[i].begin(),
data[i].
end(), std::back_inserter(stacked_data));
3083 if (identical_buffer_sizes)
3084 this->
scatter(stacked_data, recv, root_id);
3086 this->
scatter(stacked_data, counts, recv, root_id);
3091 template <
typename T,
typename A>
3094 if (this->
size() < 2 || buf.empty())
3097 LOG_SCOPE(
"alltoall()",
"Parallel");
3102 const int size_per_proc =
3103 cast_int<int>(buf.size()/this->
size());
3106 libmesh_assert_equal_to (buf.size()%this->
size(), 0);
3108 libmesh_assert(this->
verify(size_per_proc));
3113 (MPI_Alltoall (MPI_IN_PLACE, size_per_proc, send_type, buf.data(),
3114 size_per_proc, send_type, this->
get()));
3119 template <
typename T>
3123 if (this->
size() == 1)
3125 libmesh_assert (!this->
rank());
3126 libmesh_assert (!root_id);
3130 libmesh_assert_less (root_id, this->
size());
3132 LOG_SCOPE(
"broadcast()",
"Parallel");
3142 template <
typename Context,
typename Iter,
typename OutputIter>
3146 const Iter range_end,
3147 OutputIter out_iter)
const 3149 typedef typename std::iterator_traits<Iter>::value_type T;
3152 bool nonempty_range = (range_begin != range_end);
3153 this->
max(nonempty_range);
3155 while (nonempty_range)
3159 std::vector<buffer_t> buffer;
3162 (context, range_begin, range_end, buffer);
3164 this->
gather(root_id, buffer);
3167 (buffer, context, out_iter, (T*)(
nullptr));
3169 nonempty_range = (range_begin != range_end);
3170 this->
max(nonempty_range);
3175 template <
typename Context,
typename Iter,
typename OutputIter>
3178 const Iter range_end,
3179 OutputIter out_iter)
const 3181 typedef typename std::iterator_traits<Iter>::value_type T;
3184 bool nonempty_range = (range_begin != range_end);
3185 this->
max(nonempty_range);
3187 while (nonempty_range)
3191 std::vector<buffer_t> buffer;
3194 (context, range_begin, range_end, buffer);
3198 libmesh_assert(buffer.size());
3201 (buffer, context, out_iter, (T*)
nullptr);
3203 nonempty_range = (range_begin != range_end);
3204 this->
max(nonempty_range);
3213 #endif // LIBMESH_PARALLEL_IMPLEMENTATION_H data_type dataplusint_type< double >()
void set_union(T &data, const unsigned int root_id) const
void send(const unsigned int dest_processor_id, const T &buf, const MessageTag &tag=no_tag) const
data_type dataplusint_type< long >()
Iter pack_range(const Context *context, Iter range_begin, const Iter range_end, typename std::vector< buffertype > &buffer, std::size_t approx_buffer_size=1000000)
void send_receive(const unsigned int dest_processor_id, const T1 &send, const unsigned int source_processor_id, T2 &recv, const MessageTag &send_tag=no_tag, const MessageTag &recv_tag=any_tag) const
void maxloc(T &r, unsigned int &max_id) const
data_type dataplusint_type< short int >()
data_type dataplusint_type< float >()
processor_id_type size() const
void send_packed_range(const unsigned int dest_processor_id, const Context *context, Iter range_begin, const Iter range_end, const MessageTag &tag=no_tag) const
static void set_lowest(T &)
void allgather(const T &send, std::vector< T, A > &recv) const
void receive_packed_range(const unsigned int dest_processor_id, Context *context, OutputIter out, const T *output_type, const MessageTag &tag=any_tag) const
void gather(const unsigned int root_id, const T &send, std::vector< T, A > &recv) const
void alltoall(std::vector< T, A > &r) const
void nonblocking_send_packed_range(const unsigned int dest_processor_id, const Context *context, Iter range_begin, const Iter range_end, Request &req, const MessageTag &tag=no_tag) const
long double max(long double a, double b)
std::size_t packed_range_size(const Context *context, Iter range_begin, const Iter range_end)
bool semiverify(const T *r) const
void libmesh_ignore(const Args &...)
void minloc(T &r, unsigned int &min_id) const
processor_id_type rank() const
void send_receive_packed_range(const unsigned int dest_processor_id, const Context1 *context1, RangeIter send_begin, const RangeIter send_end, const unsigned int source_processor_id, Context2 *context2, OutputIter out, const T *output_type, const MessageTag &send_tag=no_tag, const MessageTag &recv_tag=any_tag) const
void scatter(const std::vector< T, A > &data, T &recv, const unsigned int root_id=0) const
data_type dataplusint_type< int >()
void nonblocking_receive_packed_range(const unsigned int src_processor_id, Context *context, OutputIter out, const T *output_type, Request &req, Status &stat, const MessageTag &tag=any_tag) const
void add_prior_request(const Request &req)
status probe(const unsigned int src_processor_id, const MessageTag &tag=any_tag) const
unsigned int size(const data_type &type) const
data_type dataplusint_type< long double >()
data_type dataplusint_type()
void allgather_packed_range(Context *context, Iter range_begin, const Iter range_end, OutputIter out) const
Status packed_range_probe(const unsigned int src_processor_id, const MessageTag &tag, bool &flag) const
static PetscErrorCode Mat * A
Status receive(const unsigned int dest_processor_id, T &buf, const MessageTag &tag=any_tag) const
void add_post_wait_work(PostWaitWork *work)
static void set_highest(T &)
OStreamProxy out(std::cout)
void unpack_range(const typename std::vector< buffertype > &buffer, Context *context, OutputIter out, const T *output_type)
void gather_packed_range(const unsigned int root_id, Context *context, Iter range_begin, const Iter range_end, OutputIter out) const
long double min(long double a, double b)
static const bool has_min_max
void broadcast(T &data, const unsigned int root_id=0) const
void broadcast_packed_range(const Context *context1, Iter range_begin, const Iter range_end, OutputContext *context2, OutputIter out, const unsigned int root_id=0) const
SendMode send_mode() const
bool verify(const T &r) const