parallel_sort.C
Go to the documentation of this file.
1 // The libMesh Finite Element Library.
2 // Copyright (C) 2002-2018 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3 
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License as published by the Free Software Foundation; either
7 // version 2.1 of the License, or (at your option) any later version.
8 
9 // This library is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // Lesser General Public License for more details.
13 
14 // You should have received a copy of the GNU Lesser General Public
15 // License along with this library; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 
18 
19 // System Includes
20 #include <algorithm>
21 #include <iostream>
22 
23 // Local Includes
24 #include "libmesh/parallel_sort.h"
25 
26 #include "libmesh/libmesh_common.h"
27 #include "libmesh/parallel.h"
30 #include "libmesh/parallel_sync.h"
31 
32 namespace libMesh
33 {
34 
35 
36 namespace Parallel {
37 
38 // The Constructor sorts the local data using
39 // std::sort(). Therefore, the construction of
40 // a Parallel::Sort object takes O(n log n) time,
41 // where n is the length of _data.
42 template <typename KeyType, typename IdxType>
44  std::vector<KeyType> & d) :
45  ParallelObject(comm_in),
46  _n_procs(cast_int<processor_id_type>(comm_in.size())),
47  _proc_id(cast_int<processor_id_type>(comm_in.rank())),
48  _bin_is_sorted(false),
49  _data(d)
50 {
51  std::sort(_data.begin(), _data.end());
52 
53  // Allocate storage
54  _local_bin_sizes.resize(_n_procs);
55 }
56 
57 
58 
59 template <typename KeyType, typename IdxType>
61 {
62  // Find the global data size. The sorting
63  // algorithms assume they have a range to
64  // work with, so catch the degenerate cases here
65  IdxType global_data_size = cast_int<IdxType>(_data.size());
66 
67  this->comm().sum (global_data_size);
68 
69  if (global_data_size < 2)
70  {
71  // the entire global range is either empty
72  // or contains only one element
73  _my_bin = _data;
74 
75  this->comm().allgather (static_cast<IdxType>(_my_bin.size()),
76  _local_bin_sizes);
77  }
78  else
79  {
80  if (this->n_processors() > 1)
81  {
82  this->binsort();
83  this->communicate_bins();
84  }
85  else
86  _my_bin = _data;
87 
88  this->sort_local_bin();
89  }
90 
91  // Set sorted flag to true
92  _bin_is_sorted = true;
93 }
94 
95 
96 
97 template <typename KeyType, typename IdxType>
99 {
100  // Find the global min and max from all the
101  // processors.
102  std::vector<KeyType> global_min_max(2);
103 
104  // Insert the local min and max for this processor
105  global_min_max[0] = -_data.front();
106  global_min_max[1] = _data.back();
107 
108  // Communicate to determine the global
109  // min and max for all processors.
110  this->comm().max(global_min_max);
111 
112  // Multiply the min by -1 to obtain the true min
113  global_min_max[0] *= -1;
114 
115  // Bin-Sort based on the global min and max
116  Parallel::BinSorter<KeyType> bs(this->comm(), _data);
117  bs.binsort(_n_procs, global_min_max[1], global_min_max[0]);
118 
119  // Now save the local bin sizes in a vector so
120  // we don't have to keep around the BinSorter.
121  for (processor_id_type i=0; i<_n_procs; ++i)
122  _local_bin_sizes[i] = bs.sizeof_bin(i);
123 }
124 
125 
126 
127 #if defined(LIBMESH_HAVE_LIBHILBERT) && defined(LIBMESH_HAVE_MPI)
128 // Full specialization for HilbertIndices, there is a fair amount of
129 // code duplication here that could potentially be consolidated with the
130 // above method
131 template <>
133 {
134  // Find the global min and max from all the
135  // processors. Do this using MPI_Allreduce.
137  local_min, local_max,
138  global_min, global_max;
139 
140  if (_data.empty())
141  {
142 #ifdef LIBMESH_ENABLE_UNIQUE_ID
143  local_min.first.rack0 = local_min.first.rack1 = local_min.first.rack2 = static_cast<Hilbert::inttype>(-1);
144  local_min.second = std::numeric_limits<unique_id_type>::max();
145  local_max.first.rack0 = local_max.first.rack1 = local_max.first.rack2 = 0;
146  local_max.second = 0;
147 #else
148  local_min.rack0 = local_min.rack1 = local_min.rack2 = static_cast<Hilbert::inttype>(-1);
149  local_max.rack0 = local_max.rack1 = local_max.rack2 = 0;
150 #endif
151  }
152  else
153  {
154  local_min = _data.front();
155  local_max = _data.back();
156  }
157 
158  MPI_Op hilbert_max, hilbert_min;
159 
160  MPI_Op_create ((MPI_User_function*)dofobjectkey_max_op, true, &hilbert_max);
161  MPI_Op_create ((MPI_User_function*)dofobjectkey_min_op, true, &hilbert_min);
162 
163  // Communicate to determine the global
164  // min and max for all processors.
165  MPI_Allreduce(&local_min,
166  &global_min,
167  1,
169  hilbert_min,
170  this->comm().get());
171 
172  MPI_Allreduce(&local_max,
173  &global_max,
174  1,
176  hilbert_max,
177  this->comm().get());
178 
179  MPI_Op_free (&hilbert_max);
180  MPI_Op_free (&hilbert_min);
181 
182  // Bin-Sort based on the global min and max
183  Parallel::BinSorter<Parallel::DofObjectKey> bs(this->comm(),_data);
184  bs.binsort(_n_procs, global_max, global_min);
185 
186  // Now save the local bin sizes in a vector so
187  // we don't have to keep around the BinSorter.
188  for (processor_id_type i=0; i<_n_procs; ++i)
189  _local_bin_sizes[i] = bs.sizeof_bin(i);
190 }
191 
192 #endif // #ifdef LIBMESH_HAVE_LIBHILBERT
193 
194 
195 template <typename KeyType, typename IdxType>
197 {
198 #ifdef LIBMESH_HAVE_MPI
199  // Find each section of our data to send
200  IdxType local_offset = 0;
201  std::map<processor_id_type, std::vector<KeyType> > pushed_keys, received_keys;
202 
203  for (processor_id_type i=0; i != _n_procs; ++i)
204  {
205  IdxType next_offset = local_offset + _local_bin_sizes[i];
206  if (_local_bin_sizes[i])
207  {
208  auto begin = _data.begin() + local_offset;
209  auto end = _data.begin() + next_offset;
210  pushed_keys[i].assign(begin, end);
211  }
212 
213  local_offset = next_offset;
214  }
215 
216  auto keys_action_functor =
217  [& received_keys]
218  (processor_id_type pid,
219  const std::vector<KeyType> & keys)
220  {
221  received_keys[pid] = keys;
222  };
223 
225  (this->comm(), pushed_keys, keys_action_functor);
226 
227  std::size_t my_bin_size = 0;
228  for (auto & p : received_keys)
229  my_bin_size += p.second.size();
230 
231  _my_bin.clear();
232  _my_bin.reserve(my_bin_size);
233 
234  for (auto & p : received_keys)
235  _my_bin.insert(_my_bin.end(), p.second.begin(), p.second.end());
236 
237 #ifdef DEBUG
238  std::vector<IdxType> global_bin_sizes = _local_bin_sizes;
239 
240  this->comm().sum(global_bin_sizes);
241 
242  libmesh_assert_equal_to
243  (global_bin_sizes[this->processor_id()], _my_bin.size());
244 #endif
245 
246 #endif // LIBMESH_HAVE_MPI
247 }
248 
249 
250 
251 template <typename KeyType, typename IdxType>
253 {
254  std::sort(_my_bin.begin(), _my_bin.end());
255 }
256 
257 
258 
259 template <typename KeyType, typename IdxType>
260 const std::vector<KeyType> & Sort<KeyType,IdxType>::bin()
261 {
262  if (!_bin_is_sorted)
263  {
264  libMesh::out << "Warning! Bin is not yet sorted!" << std::endl;
265  }
266 
267  return _my_bin;
268 }
269 
270 }
271 
272 
273 
274 // Explicitly instantiate for int, double
275 template class Parallel::Sort<int, unsigned int>;
277 #if defined(LIBMESH_HAVE_LIBHILBERT) && defined(LIBMESH_HAVE_MPI)
279 #endif
280 
281 } // namespace libMesh
const std::vector< KeyType > & bin()
void dofobjectkey_min_op(libMesh::Parallel::DofObjectKey *in, libMesh::Parallel::DofObjectKey *inout, int *len, void *)
std::pair< Hilbert::HilbertIndices, unique_id_type > DofObjectKey
uint8_t processor_id_type
Definition: id_types.h:99
std::vector< KeyType > & _data
IterBase * end
std::vector< IdxType > _local_bin_sizes
long double max(long double a, double b)
Tnew cast_int(Told oldvar)
void dofobjectkey_max_op(libMesh::Parallel::DofObjectKey *in, libMesh::Parallel::DofObjectKey *inout, int *len, void *)
const processor_id_type _n_procs
Definition: parallel_sort.h:88
Parallel bin sorting object.
An object whose state is distributed along a set of processors.
IdxType sizeof_bin(const IdxType bin) const
Object for performing parallel sorts using MPI.
Definition: parallel_sort.h:53
Sort(const Parallel::Communicator &comm, std::vector< KeyType > &d)
Definition: parallel_sort.C:43
void push_parallel_vector_data(const Communicator &comm, const MapToVectors &data, RequestContainer &reqs, ActionFunctor &act_on_data)
OStreamProxy out(std::cout)
void binsort(const IdxType nbins, KeyType max, KeyType min)