dense_matrix_blas_lapack.C
Go to the documentation of this file.
1 // The libMesh Finite Element Library.
2 // Copyright (C) 2002-2018 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3 
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License as published by the Free Software Foundation; either
7 // version 2.1 of the License, or (at your option) any later version.
8 
9 // This library is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // Lesser General Public License for more details.
13 
14 // You should have received a copy of the GNU Lesser General Public
15 // License along with this library; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 
18 
19 // Local Includes
20 #include "libmesh/dense_matrix.h"
21 #include "libmesh/dense_vector.h"
22 #include "libmesh/int_range.h"
23 
24 #if (LIBMESH_HAVE_PETSC)
25 # include "libmesh/petsc_macro.h"
26 # include "libmesh/ignore_warnings.h"
27 # include <petscblaslapack.h>
29 #endif
30 
31 namespace libMesh
32 {
33 
34 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
35 
36 template<typename T>
39 {
40  int result_size = 0;
41 
42  // For each case, determine the size of the final result make sure
43  // that the inner dimensions match
44  switch (flag)
45  {
46  case LEFT_MULTIPLY:
47  {
48  result_size = other.m() * this->n();
49  if (other.n() == this->m())
50  break;
51  }
52  libmesh_fallthrough();
53  case RIGHT_MULTIPLY:
54  {
55  result_size = other.n() * this->m();
56  if (other.m() == this->n())
57  break;
58  }
59  libmesh_fallthrough();
60  case LEFT_MULTIPLY_TRANSPOSE:
61  {
62  result_size = other.n() * this->n();
63  if (other.m() == this->m())
64  break;
65  }
66  libmesh_fallthrough();
67  case RIGHT_MULTIPLY_TRANSPOSE:
68  {
69  result_size = other.m() * this->m();
70  if (other.n() == this->n())
71  break;
72  }
73  libmesh_fallthrough();
74  default:
75  libmesh_error_msg("Unknown flag selected or matrices are incompatible for multiplication.");
76  }
77 
78  // For this to work, the passed arg. must actually be a DenseMatrix<T>
79  const DenseMatrix<T> * const_that = cast_ptr<const DenseMatrix<T> *>(&other);
80 
81  // Also, although 'that' is logically const in this BLAS routine,
82  // the PETSc BLAS interface does not specify that any of the inputs are
83  // const. To use it, I must cast away const-ness.
84  DenseMatrix<T> * that = const_cast<DenseMatrix<T> *> (const_that);
85 
86  // Initialize A, B pointers for LEFT_MULTIPLY* cases
87  DenseMatrix<T> * A = this;
88  DenseMatrix<T> * B = that;
89 
90  // For RIGHT_MULTIPLY* cases, swap the meaning of A and B.
91  // Here is a full table of combinations we can pass to BLASgemm, and what the answer is when finished:
92  // pass A B -> (Fortran) -> A^T B^T -> (C++) -> (A^T B^T)^T -> (identity) -> B A "lt multiply"
93  // pass B A -> (Fortran) -> B^T A^T -> (C++) -> (B^T A^T)^T -> (identity) -> A B "rt multiply"
94  // pass A B^T -> (Fortran) -> A^T B -> (C++) -> (A^T B)^T -> (identity) -> B^T A "lt multiply t"
95  // pass B^T A -> (Fortran) -> B A^T -> (C++) -> (B A^T)^T -> (identity) -> A B^T "rt multiply t"
96  if (flag==RIGHT_MULTIPLY || flag==RIGHT_MULTIPLY_TRANSPOSE)
97  std::swap(A,B);
98 
99  // transa, transb values to pass to blas
100  char
101  transa[] = "n",
102  transb[] = "n";
103 
104  // Integer values to pass to BLAS:
105  //
106  // M
107  // In Fortran, the number of rows of op(A),
108  // In the BLAS documentation, typically known as 'M'.
109  //
110  // In C/C++, we set:
111  // M = n_cols(A) if (transa='n')
112  // n_rows(A) if (transa='t')
113  PetscBLASInt M = static_cast<PetscBLASInt>( A->n() );
114 
115  // N
116  // In Fortran, the number of cols of op(B), and also the number of cols of C.
117  // In the BLAS documentation, typically known as 'N'.
118  //
119  // In C/C++, we set:
120  // N = n_rows(B) if (transb='n')
121  // n_cols(B) if (transb='t')
122  PetscBLASInt N = static_cast<PetscBLASInt>( B->m() );
123 
124  // K
125  // In Fortran, the number of cols of op(A), and also
126  // the number of rows of op(B). In the BLAS documentation,
127  // typically known as 'K'.
128  //
129  // In C/C++, we set:
130  // K = n_rows(A) if (transa='n')
131  // n_cols(A) if (transa='t')
132  PetscBLASInt K = static_cast<PetscBLASInt>( A->m() );
133 
134  // LDA (leading dimension of A). In our cases,
135  // LDA is always the number of columns of A.
136  PetscBLASInt LDA = static_cast<PetscBLASInt>( A->n() );
137 
138  // LDB (leading dimension of B). In our cases,
139  // LDB is always the number of columns of B.
140  PetscBLASInt LDB = static_cast<PetscBLASInt>( B->n() );
141 
142  if (flag == LEFT_MULTIPLY_TRANSPOSE)
143  {
144  transb[0] = 't';
145  N = static_cast<PetscBLASInt>( B->n() );
146  }
147 
148  else if (flag == RIGHT_MULTIPLY_TRANSPOSE)
149  {
150  transa[0] = 't';
151  std::swap(M,K);
152  }
153 
154  // LDC (leading dimension of C). LDC is the
155  // number of columns in the solution matrix.
156  PetscBLASInt LDC = M;
157 
158  // Scalar values to pass to BLAS
159  //
160  // scalar multiplying the whole product AB
161  T alpha = 1.;
162 
163  // scalar multiplying C, which is the original matrix.
164  T beta = 0.;
165 
166  // Storage for the result
167  std::vector<T> result (result_size);
168 
169  // Finally ready to call the BLAS
170  BLASgemm_(transa, transb, &M, &N, &K, &alpha, A->get_values().data(), &LDA, B->get_values().data(), &LDB, &beta, result.data(), &LDC);
171 
172  // Update the relevant dimension for this matrix.
173  switch (flag)
174  {
175  case LEFT_MULTIPLY: { this->_m = other.m(); break; }
176  case RIGHT_MULTIPLY: { this->_n = other.n(); break; }
177  case LEFT_MULTIPLY_TRANSPOSE: { this->_m = other.n(); break; }
178  case RIGHT_MULTIPLY_TRANSPOSE: { this->_n = other.m(); break; }
179  default:
180  libmesh_error_msg("Unknown flag selected.");
181  }
182 
183  // Swap my data vector with the result
184  this->_val.swap(result);
185 }
186 
187 #else
188 
189 template<typename T>
191  _BLAS_Multiply_Flag)
192 {
193  libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!");
194 }
195 
196 #endif
197 
198 
199 
200 
201 
202 
203 
204 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
205 
206 template<typename T>
208 {
209  // If this function was called, there better not be any
210  // previous decomposition of the matrix.
211  libmesh_assert_equal_to (this->_decomposition_type, NONE);
212 
213  // The calling sequence for dgetrf is:
214  // dgetrf(M, N, A, lda, ipiv, info)
215 
216  // M (input)
217  // The number of rows of the matrix A. M >= 0.
218  // In C/C++, pass the number of *cols* of A
219  PetscBLASInt M = this->n();
220 
221  // N (input)
222  // The number of columns of the matrix A. N >= 0.
223  // In C/C++, pass the number of *rows* of A
224  PetscBLASInt N = this->m();
225 
226  // A (input/output) double precision array, dimension (LDA,N)
227  // On entry, the M-by-N matrix to be factored.
228  // On exit, the factors L and U from the factorization
229  // A = P*L*U; the unit diagonal elements of L are not stored.
230 
231  // LDA (input)
232  // The leading dimension of the array A. LDA >= max(1,M).
233  PetscBLASInt LDA = M;
234 
235  // ipiv (output) integer array, dimension (min(m,n))
236  // The pivot indices; for 1 <= i <= min(m,n), row i of the
237  // matrix was interchanged with row IPIV(i).
238  // Here, we pass _pivots.data(), a private class member used to store pivots
239  this->_pivots.resize( std::min(M,N) );
240 
241  // info (output)
242  // = 0: successful exit
243  // < 0: if INFO = -i, the i-th argument had an illegal value
244  // > 0: if INFO = i, U(i,i) is exactly zero. The factorization
245  // has been completed, but the factor U is exactly
246  // singular, and division by zero will occur if it is used
247  // to solve a system of equations.
248  PetscBLASInt INFO = 0;
249 
250  // Ready to call the actual factorization routine through PETSc's interface
251  LAPACKgetrf_(&M, &N, this->_val.data(), &LDA, _pivots.data(), &INFO);
252 
253  // Check return value for errors
254  if (INFO != 0)
255  libmesh_error_msg("INFO=" << INFO << ", Error during Lapack LU factorization!");
256 
257  // Set the flag for LU decomposition
258  this->_decomposition_type = LU_BLAS_LAPACK;
259 }
260 
261 #else
262 
263 template<typename T>
265 {
266  libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!");
267 }
268 
269 #endif
270 
271 
272 
273 template<typename T>
275 {
276  // The calling sequence for dgetrf is:
277  // DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO )
278 
279  // JOBU (input)
280  // Specifies options for computing all or part of the matrix U:
281  // = 'A': all M columns of U are returned in array U:
282  // = 'S': the first min(m,n) columns of U (the left singular
283  // vectors) are returned in the array U;
284  // = 'O': the first min(m,n) columns of U (the left singular
285  // vectors) are overwritten on the array A;
286  // = 'N': no columns of U (no left singular vectors) are
287  // computed.
288  char JOBU = 'N';
289 
290  // JOBVT (input)
291  // Specifies options for computing all or part of the matrix
292  // V**T:
293  // = 'A': all N rows of V**T are returned in the array VT;
294  // = 'S': the first min(m,n) rows of V**T (the right singular
295  // vectors) are returned in the array VT;
296  // = 'O': the first min(m,n) rows of V**T (the right singular
297  // vectors) are overwritten on the array A;
298  // = 'N': no rows of V**T (no right singular vectors) are
299  // computed.
300  char JOBVT = 'N';
301 
302  std::vector<Real> sigma_val;
303  std::vector<Number> U_val;
304  std::vector<Number> VT_val;
305 
306  _svd_helper(JOBU, JOBVT, sigma_val, U_val, VT_val);
307 
308  // Copy the singular values into sigma, ignore U_val and VT_val
309  sigma.resize(cast_int<unsigned int>(sigma_val.size()));
310  for (auto i : IntRange<int>(0, sigma.size()))
311  sigma(i) = sigma_val[i];
312 }
313 
314 template<typename T>
317  DenseMatrix<Number> & VT)
318 {
319  // The calling sequence for dgetrf is:
320  // DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO )
321 
322  // JOBU (input)
323  // Specifies options for computing all or part of the matrix U:
324  // = 'A': all M columns of U are returned in array U:
325  // = 'S': the first min(m,n) columns of U (the left singular
326  // vectors) are returned in the array U;
327  // = 'O': the first min(m,n) columns of U (the left singular
328  // vectors) are overwritten on the array A;
329  // = 'N': no columns of U (no left singular vectors) are
330  // computed.
331  char JOBU = 'S';
332 
333  // JOBVT (input)
334  // Specifies options for computing all or part of the matrix
335  // V**T:
336  // = 'A': all N rows of V**T are returned in the array VT;
337  // = 'S': the first min(m,n) rows of V**T (the right singular
338  // vectors) are returned in the array VT;
339  // = 'O': the first min(m,n) rows of V**T (the right singular
340  // vectors) are overwritten on the array A;
341  // = 'N': no rows of V**T (no right singular vectors) are
342  // computed.
343  char JOBVT = 'S';
344 
345  // Note: Lapack is going to compute the singular values of A^T. If
346  // A=U * S * V^T, then A^T = V * S * U^T, which means that the
347  // values returned in the "U_val" array actually correspond to the
348  // entries of the V matrix, and the values returned in the VT_val
349  // array actually correspond to the entries of U^T. Therefore, we
350  // pass VT in the place of U and U in the place of VT below!
351  std::vector<Real> sigma_val;
352  int M = this->n();
353  int N = this->m();
354  int min_MN = (M < N) ? M : N;
355 
356  // Size user-provided storage appropriately. Inside svd_helper:
357  // U_val is sized to (M x min_MN)
358  // VT_val is sized to (min_MN x N)
359  // So, we set up U to have the shape of "VT_val^T", and VT to
360  // have the shape of "U_val^T".
361  //
362  // Finally, since the results are stored in column-major order by
363  // Lapack, but we actually want the transpose of what Lapack
364  // returns, this means (conveniently) that we don't even have to
365  // copy anything after the call to _svd_helper, it should already be
366  // in the correct order!
367  U.resize(N, min_MN);
368  VT.resize(min_MN, M);
369 
370  _svd_helper(JOBU, JOBVT, sigma_val, VT.get_values(), U.get_values());
371 
372  // Copy the singular values into sigma.
373  sigma.resize(cast_int<unsigned int>(sigma_val.size()));
374  for (auto i : IntRange<int>(0, sigma.size()))
375  sigma(i) = sigma_val[i];
376 }
377 
378 #if (LIBMESH_HAVE_PETSC)
379 
380 template<typename T>
382  char JOBVT,
383  std::vector<Real> & sigma_val,
384  std::vector<Number> & U_val,
385  std::vector<Number> & VT_val)
386 {
387 
388  // M (input)
389  // The number of rows of the matrix A. M >= 0.
390  // In C/C++, pass the number of *cols* of A
391  PetscBLASInt M = this->n();
392 
393  // N (input)
394  // The number of columns of the matrix A. N >= 0.
395  // In C/C++, pass the number of *rows* of A
396  PetscBLASInt N = this->m();
397 
398  PetscBLASInt min_MN = (M < N) ? M : N;
399  PetscBLASInt max_MN = (M > N) ? M : N;
400 
401  // A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
402  // On entry, the M-by-N matrix A.
403  // On exit,
404  // if JOBU = 'O', A is overwritten with the first min(m,n)
405  // columns of U (the left singular vectors,
406  // stored columnwise);
407  // if JOBVT = 'O', A is overwritten with the first min(m,n)
408  // rows of V**T (the right singular vectors,
409  // stored rowwise);
410  // if JOBU != 'O' and JOBVT != 'O', the contents of A are destroyed.
411 
412  // LDA (input)
413  // The leading dimension of the array A. LDA >= max(1,M).
414  PetscBLASInt LDA = M;
415 
416  // S (output) DOUBLE PRECISION array, dimension (min(M,N))
417  // The singular values of A, sorted so that S(i) >= S(i+1).
418  sigma_val.resize( min_MN );
419 
420  // LDU (input)
421  // The leading dimension of the array U. LDU >= 1; if
422  // JOBU = 'S' or 'A', LDU >= M.
423  PetscBLASInt LDU = M;
424 
425  // U (output) DOUBLE PRECISION array, dimension (LDU,UCOL)
426  // (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
427  // If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
428  // if JOBU = 'S', U contains the first min(m,n) columns of U
429  // (the left singular vectors, stored columnwise);
430  // if JOBU = 'N' or 'O', U is not referenced.
431  if (JOBU == 'S')
432  U_val.resize( LDU*min_MN );
433  else
434  U_val.resize( LDU*M );
435 
436  // LDVT (input)
437  // The leading dimension of the array VT. LDVT >= 1; if
438  // JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
439  PetscBLASInt LDVT = N;
440  if (JOBVT == 'S')
441  LDVT = min_MN;
442 
443  // VT (output) DOUBLE PRECISION array, dimension (LDVT,N)
444  // If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
445  // V**T;
446  // if JOBVT = 'S', VT contains the first min(m,n) rows of
447  // V**T (the right singular vectors, stored rowwise);
448  // if JOBVT = 'N' or 'O', VT is not referenced.
449  VT_val.resize( LDVT*N );
450 
451  // LWORK (input)
452  // The dimension of the array WORK.
453  // LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)).
454  // For good performance, LWORK should generally be larger.
455  //
456  // If LWORK = -1, then a workspace query is assumed; the routine
457  // only calculates the optimal size of the WORK array, returns
458  // this value as the first entry of the WORK array, and no error
459  // message related to LWORK is issued by XERBLA.
460  PetscBLASInt larger = (3*min_MN+max_MN > 5*min_MN) ? 3*min_MN+max_MN : 5*min_MN;
461  PetscBLASInt LWORK = (larger > 1) ? larger : 1;
462 
463 
464  // WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
465  // On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
466  // if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
467  // superdiagonal elements of an upper bidiagonal matrix B
468  // whose diagonal is in S (not necessarily sorted). B
469  // satisfies A = U * B * VT, so it has the same singular values
470  // as A, and singular vectors related by U and VT.
471  std::vector<Number> WORK( LWORK );
472 
473  // INFO (output)
474  // = 0: successful exit.
475  // < 0: if INFO = -i, the i-th argument had an illegal value.
476  // > 0: if DBDSQR did not converge, INFO specifies how many
477  // superdiagonals of an intermediate bidiagonal form B
478  // did not converge to zero. See the description of WORK
479  // above for details.
480  PetscBLASInt INFO = 0;
481 
482  // Ready to call the actual factorization routine through PETSc's interface.
483 #ifdef LIBMESH_USE_REAL_NUMBERS
484  // Note that the call to LAPACKgesvd_ may modify _val
485  LAPACKgesvd_(&JOBU, &JOBVT, &M, &N, _val.data(), &LDA, sigma_val.data(), U_val.data(),
486  &LDU, VT_val.data(), &LDVT, WORK.data(), &LWORK, &INFO);
487 #else
488  // When we have LIBMESH_USE_COMPLEX_NUMBERS then we must pass an array of Complex
489  // numbers to LAPACKgesvd_, but _val may contain Reals so we copy to Number below to
490  // handle both the real-valued and complex-valued cases.
491  std::vector<Number> val_copy(_val.size());
492  for (std::size_t i=0; i<_val.size(); i++)
493  val_copy[i] = _val[i];
494 
495  std::vector<Real> RWORK(5 * min_MN);
496  LAPACKgesvd_(&JOBU, &JOBVT, &M, &N, val_copy.data(), &LDA, sigma_val.data(), U_val.data(),
497  &LDU, VT_val.data(), &LDVT, WORK.data(), &LWORK, RWORK.data(), &INFO);
498 #endif
499 
500  // Check return value for errors
501  if (INFO != 0)
502  libmesh_error_msg("INFO=" << INFO << ", Error during Lapack SVD calculation!");
503 }
504 
505 
506 #else
507 
508 template<typename T>
509 void DenseMatrix<T>::_svd_helper (char,
510  char,
511  std::vector<Real> &,
512  std::vector<Number> &,
513  std::vector<Number> &)
514 {
515  libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!");
516 }
517 
518 #endif
519 
520 
521 
522 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
523 #if !PETSC_VERSION_LESS_THAN(3,1,0)
524 
525 template<typename T>
527  DenseVector<T> & x,
528  Real rcond) const
529 {
530  // Since BLAS is expecting column-major storage, we first need to
531  // make a transposed copy of *this, then pass it to the gelss
532  // routine instead of the original. This extra copy is kind of a
533  // bummer, it might be better if we could use the full SVD to
534  // compute the least-squares solution instead... Note that it isn't
535  // completely terrible either, since A_trans gets overwritten by
536  // Lapack, and we usually would end up making a copy of A outside
537  // the function call anyway.
538  DenseMatrix<T> A_trans;
539  this->get_transpose(A_trans);
540 
541  // M
542  // The number of rows of the input matrix. M >= 0.
543  // This is actually the number of *columns* of A_trans.
544  PetscBLASInt M = A_trans.n();
545 
546  // N
547  // The number of columns of the matrix A. N >= 0.
548  // This is actually the number of *rows* of A_trans.
549  PetscBLASInt N = A_trans.m();
550 
551  // We'll use the min and max of (M,N) several times below.
552  PetscBLASInt max_MN = std::max(M,N);
553  PetscBLASInt min_MN = std::min(M,N);
554 
555  // NRHS
556  // The number of right hand sides, i.e., the number of columns
557  // of the matrices B and X. NRHS >= 0.
558  // This could later be generalized to solve for multiple right-hand
559  // sides...
560  PetscBLASInt NRHS = 1;
561 
562  // A is double precision array, dimension (LDA,N)
563  // On entry, the M-by-N matrix A.
564  // On exit, the first min(m,n) rows of A are overwritten with
565  // its right singular vectors, stored rowwise.
566  //
567  // The data vector that will be passed to Lapack.
568  std::vector<T> & A_trans_vals = A_trans.get_values();
569 
570  // LDA
571  // The leading dimension of the array A. LDA >= max(1,M).
572  PetscBLASInt LDA = M;
573 
574  // B is double precision array, dimension (LDB,NRHS)
575  // On entry, the M-by-NRHS right hand side matrix B.
576  // On exit, B is overwritten by the N-by-NRHS solution
577  // matrix X. If m >= n and RANK = n, the residual
578  // sum-of-squares for the solution in the i-th column is given
579  // by the sum of squares of elements n+1:m in that column.
580  //
581  // Since we don't want the user's rhs vector to be overwritten by
582  // the solution, we copy the rhs values into the solution vector "x"
583  // now. x needs to be long enough to hold both the (Nx1) solution
584  // vector or the (Mx1) rhs, so size it to the max of those.
585  x.resize(max_MN);
586  for (auto i : IntRange<int>(0, rhs.size()))
587  x(i) = rhs(i);
588 
589  // Make the syntax below simpler by grabbing a reference to this array.
590  std::vector<T> & B = x.get_values();
591 
592  // LDB
593  // The leading dimension of the array B. LDB >= max(1,max(M,N)).
594  PetscBLASInt LDB = x.size();
595 
596  // S is double precision array, dimension (min(M,N))
597  // The singular values of A in decreasing order.
598  // The condition number of A in the 2-norm = S(1)/S(min(m,n)).
599  std::vector<T> S(min_MN);
600 
601  // RCOND
602  // Used to determine the effective rank of A. Singular values
603  // S(i) <= RCOND*S(1) are treated as zero. If RCOND < 0, machine
604  // precision is used instead.
605  Real RCOND = rcond;
606 
607  // RANK
608  // The effective rank of A, i.e., the number of singular values
609  // which are greater than RCOND*S(1).
610  PetscBLASInt RANK = 0;
611 
612  // LWORK
613  // The dimension of the array WORK. LWORK >= 1, and also:
614  // LWORK >= 3*min(M,N) + max( 2*min(M,N), max(M,N), NRHS )
615  // For good performance, LWORK should generally be larger.
616  //
617  // If LWORK = -1, then a workspace query is assumed; the routine
618  // only calculates the optimal size of the WORK array, returns
619  // this value as the first entry of the WORK array, and no error
620  // message related to LWORK is issued by XERBLA.
621  //
622  // The factor of 3/2 is arbitrary and is used to satisfy the "should
623  // generally be larger" clause.
624  PetscBLASInt LWORK = (3*min_MN + std::max(2*min_MN, std::max(max_MN, NRHS))) * 3/2;
625 
626  // WORK is double precision array, dimension (MAX(1,LWORK))
627  // On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
628  std::vector<T> WORK(LWORK);
629 
630  // INFO
631  // = 0: successful exit
632  // < 0: if INFO = -i, the i-th argument had an illegal value.
633  // > 0: the algorithm for computing the SVD failed to converge;
634  // if INFO = i, i off-diagonal elements of an intermediate
635  // bidiagonal form did not converge to zero.
636  PetscBLASInt INFO = 0;
637 
638  // LAPACKgelss_(const PetscBLASInt *, // M
639  // const PetscBLASInt *, // N
640  // const PetscBLASInt *, // NRHS
641  // PetscScalar *, // A
642  // const PetscBLASInt *, // LDA
643  // PetscScalar *, // B
644  // const PetscBLASInt *, // LDB
645  // PetscReal *, // S(out) = singular values of A in increasing order
646  // const PetscReal *, // RCOND = tolerance for singular values
647  // PetscBLASInt *, // RANK(out) = number of "non-zero" singular values
648  // PetscScalar *, // WORK
649  // const PetscBLASInt *, // LWORK
650  // PetscBLASInt *); // INFO
651  LAPACKgelss_(&M, &N, &NRHS, A_trans_vals.data(), &LDA, B.data(), &LDB, S.data(), &RCOND, &RANK, WORK.data(), &LWORK, &INFO);
652 
653  // Check for errors in the Lapack call
654  if (INFO < 0)
655  libmesh_error_msg("Error, argument " << -INFO << " to LAPACKgelss_ had an illegal value.");
656  if (INFO > 0)
657  libmesh_error_msg("The algorithm for computing the SVD failed to converge!");
658 
659  // Debugging: print singular values and information about condition number:
660  // libMesh::err << "RCOND=" << RCOND << std::endl;
661  // libMesh::err << "Singular values: " << std::endl;
662  // for (std::size_t i=0; i<S.size(); ++i)
663  // libMesh::err << S[i] << std::endl;
664  // libMesh::err << "The condition number of A is approximately: " << S[0]/S.back() << std::endl;
665 
666  // Lapack has already written the solution into B, but it will be
667  // the wrong size for non-square problems, so we need to resize it
668  // correctly. The size of the solution vector should be the number
669  // of columns of the original A matrix. Unfortunately, resizing a
670  // DenseVector currently also zeros it out (unlike a std::vector) so
671  // we'll resize the underlying storage directly (the size is not
672  // stored independently elsewhere).
673  x.get_values().resize(this->n());
674 }
675 
676 #else
677 
678 template<typename T>
680  DenseVector<T> & /*x*/,
681  Real /*rcond*/) const
682 {
683  libmesh_error_msg("svd_solve() requires PETSc >= 3.1!");
684 }
685 
686 #endif // !PETSC_VERSION_LESS_THAN(3,1,0)
687 
688 #else
689 template<typename T>
690 void DenseMatrix<T>::_svd_solve_lapack(const DenseVector<T>& /*rhs*/,
691  DenseVector<T> & /*x*/,
692  Real /*rcond*/) const
693 {
694  libmesh_not_implemented();
695 }
696 #endif // (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
697 
698 
699 
700 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
701 
702 template<typename T>
704  DenseVector<T> & lambda_imag,
705  DenseMatrix<T> * VL,
706  DenseMatrix<T> * VR)
707 {
708  // This algorithm only works for square matrices, so verify this up front.
709  if (this->m() != this->n())
710  libmesh_error_msg("Can only compute eigen-decompositions for square matrices.");
711 
712  // If the user requests left or right eigenvectors, we have to make
713  // sure and pass the transpose of this matrix to Lapack, otherwise
714  // it will compute the inverse transpose of what we are
715  // after... since we know the matrix is square, we can just swap
716  // entries in place. If the user does not request eigenvectors, we
717  // can skip this extra step, since the eigenvalues for A and A^T are
718  // the same.
719  if (VL || VR)
720  {
721  for (auto i : IntRange<int>(0, this->_m))
722  for (auto j : IntRange<int>(0, i))
723  std::swap((*this)(i,j), (*this)(j,i));
724  }
725 
726  // The calling sequence for dgeev is:
727  // DGEEV (character JOBVL,
728  // character JOBVR,
729  // integer N,
730  // double precision, dimension( lda, * ) A,
731  // integer LDA,
732  // double precision, dimension( * ) WR,
733  // double precision, dimension( * ) WI,
734  // double precision, dimension( ldvl, * ) VL,
735  // integer LDVL,
736  // double precision, dimension( ldvr, * ) VR,
737  // integer LDVR,
738  // double precision, dimension( * ) WORK,
739  // integer LWORK,
740  // integer INFO)
741 
742  // JOBVL (input)
743  // = 'N': left eigenvectors of A are not computed;
744  // = 'V': left eigenvectors of A are computed.
745  char JOBVL = VL ? 'V' : 'N';
746 
747  // JOBVR (input)
748  // = 'N': right eigenvectors of A are not computed;
749  // = 'V': right eigenvectors of A are computed.
750  char JOBVR = VR ? 'V' : 'N';
751 
752  // N (input)
753  // The number of rows/cols of the matrix A. N >= 0.
754  PetscBLASInt N = this->m();
755 
756  // A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
757  // On entry, the N-by-N matrix A.
758  // On exit, A has been overwritten.
759 
760  // LDA (input)
761  // The leading dimension of the array A. LDA >= max(1,N).
762  PetscBLASInt LDA = N;
763 
764  // WR (output) double precision array, dimension (N)
765  // WI (output) double precision array, dimension (N)
766  // WR and WI contain the real and imaginary parts,
767  // respectively, of the computed eigenvalues. Complex
768  // conjugate pairs of eigenvalues appear consecutively
769  // with the eigenvalue having the positive imaginary part
770  // first.
771  lambda_real.resize(N);
772  lambda_imag.resize(N);
773 
774  // VL (output) double precision array, dimension (LDVL,N)
775  // If JOBVL = 'V', the left eigenvectors u(j) are stored one
776  // after another in the columns of VL, in the same order
777  // as their eigenvalues.
778  // If JOBVL = 'N', VL is not referenced.
779  // If the j-th eigenvalue is real, then u(j) = VL(:,j),
780  // the j-th column of VL.
781  // If the j-th and (j+1)-st eigenvalues form a complex
782  // conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and
783  // u(j+1) = VL(:,j) - i*VL(:,j+1).
784  // Will be set below if needed.
785 
786  // LDVL (input)
787  // The leading dimension of the array VL. LDVL >= 1; if
788  // JOBVL = 'V', LDVL >= N.
789  PetscBLASInt LDVL = VL ? N : 1;
790 
791  // VR (output) DOUBLE PRECISION array, dimension (LDVR,N)
792  // If JOBVR = 'V', the right eigenvectors v(j) are stored one
793  // after another in the columns of VR, in the same order
794  // as their eigenvalues.
795  // If JOBVR = 'N', VR is not referenced.
796  // If the j-th eigenvalue is real, then v(j) = VR(:,j),
797  // the j-th column of VR.
798  // If the j-th and (j+1)-st eigenvalues form a complex
799  // conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and
800  // v(j+1) = VR(:,j) - i*VR(:,j+1).
801  // Will be set below if needed.
802 
803  // LDVR (input)
804  // The leading dimension of the array VR. LDVR >= 1; if
805  // JOBVR = 'V', LDVR >= N.
806  PetscBLASInt LDVR = VR ? N : 1;
807 
808  // WORK (workspace/output) double precision array, dimension (MAX(1,LWORK))
809  // On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
810  //
811  // LWORK (input)
812  // The dimension of the array WORK. LWORK >= max(1,3*N), and
813  // if JOBVL = 'V' or JOBVR = 'V', LWORK >= 4*N. For good
814  // performance, LWORK must generally be larger.
815  //
816  // If LWORK = -1, then a workspace query is assumed; the routine
817  // only calculates the optimal size of the WORK array, returns
818  // this value as the first entry of the WORK array, and no error
819  // message related to LWORK is issued by XERBLA.
820  PetscBLASInt LWORK = (VR || VL) ? 4*N : 3*N;
821  std::vector<T> WORK(LWORK);
822 
823  // INFO (output)
824  // = 0: successful exit
825  // < 0: if INFO = -i, the i-th argument had an illegal value.
826  // > 0: if INFO = i, the QR algorithm failed to compute all the
827  // eigenvalues, and no eigenvectors or condition numbers
828  // have been computed; elements 1:ILO-1 and i+1:N of WR
829  // and WI contain eigenvalues which have converged.
830  PetscBLASInt INFO = 0;
831 
832  // Get references to raw data
833  std::vector<T> & lambda_real_val = lambda_real.get_values();
834  std::vector<T> & lambda_imag_val = lambda_imag.get_values();
835 
836  // Set up eigenvector storage if necessary.
837  T * VR_ptr = nullptr;
838  if (VR)
839  {
840  VR->resize(N, N);
841  VR_ptr = VR->get_values().data();
842  }
843 
844  T * VL_ptr = nullptr;
845  if (VL)
846  {
847  VL->resize(N, N);
848  VL_ptr = VL->get_values().data();
849  }
850 
851  // Ready to call the Lapack routine through PETSc's interface
852  LAPACKgeev_(&JOBVL,
853  &JOBVR,
854  &N,
855  _val.data(),
856  &LDA,
857  lambda_real_val.data(),
858  lambda_imag_val.data(),
859  VL_ptr,
860  &LDVL,
861  VR_ptr,
862  &LDVR,
863  WORK.data(),
864  &LWORK,
865  &INFO);
866 
867  // Check return value for errors
868  if (INFO != 0)
869  libmesh_error_msg("INFO=" << INFO << ", Error during Lapack eigenvalue calculation!");
870 
871  // If the user requested either right or left eigenvectors, LAPACK
872  // has now computed the transpose of the desired matrix, i.e. V^T
873  // instead of V. We could leave this up to user code to handle, but
874  // rather than risking getting very unexpected results, we'll just
875  // transpose it in place before handing it back.
876  if (VR)
877  {
878  for (auto i : IntRange<int>(0, N))
879  for (auto j : IntRange<int>(0, i))
880  std::swap((*VR)(i,j), (*VR)(j,i));
881  }
882 
883  if (VL)
884  {
885  for (auto i : IntRange<int>(0, N))
886  for (auto j : IntRange<int>(0, i))
887  std::swap((*VL)(i,j), (*VL)(j,i));
888  }
889 }
890 
891 #else
892 
893 template<typename T>
895  DenseVector<T> &,
896  DenseMatrix<T> *,
897  DenseMatrix<T> *)
898 {
899  libmesh_error_msg("_evd_lapack is currently only available when LIBMESH_USE_REAL_NUMBERS is defined!");
900 }
901 
902 #endif
903 
904 
905 
906 
907 
908 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
909 
910 template<typename T>
912  DenseVector<T> & x)
913 {
914  // The calling sequence for getrs is:
915  // dgetrs(TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO)
916 
917  // trans (input)
918  // 'n' for no transpose, 't' for transpose
919  char TRANS[] = "t";
920 
921  // N (input)
922  // The order of the matrix A. N >= 0.
923  PetscBLASInt N = this->m();
924 
925 
926  // NRHS (input)
927  // The number of right hand sides, i.e., the number of columns
928  // of the matrix B. NRHS >= 0.
929  PetscBLASInt NRHS = 1;
930 
931  // A (input) double precision array, dimension (LDA,N)
932  // The factors L and U from the factorization A = P*L*U
933  // as computed by dgetrf.
934 
935  // LDA (input)
936  // The leading dimension of the array A. LDA >= max(1,N).
937  PetscBLASInt LDA = N;
938 
939  // ipiv (input) int array, dimension (N)
940  // The pivot indices from DGETRF; for 1<=i<=N, row i of the
941  // matrix was interchanged with row IPIV(i).
942  // Here, we pass _pivots.data() which was computed in _lu_decompose_lapack
943 
944  // B (input/output) double precision array, dimension (LDB,NRHS)
945  // On entry, the right hand side matrix B.
946  // On exit, the solution matrix X.
947  // Here, we pass a copy of the rhs vector's data array in x, so that the
948  // passed right-hand side b is unmodified. I don't see a way around this
949  // copy if we want to maintain an unmodified rhs in LibMesh.
950  x = b;
951  std::vector<T> & x_vec = x.get_values();
952 
953  // We can avoid the copy if we don't care about overwriting the RHS: just
954  // pass b to the Lapack routine and then swap with x before exiting
955  // std::vector<T> & x_vec = b.get_values();
956 
957  // LDB (input)
958  // The leading dimension of the array B. LDB >= max(1,N).
959  PetscBLASInt LDB = N;
960 
961  // INFO (output)
962  // = 0: successful exit
963  // < 0: if INFO = -i, the i-th argument had an illegal value
964  PetscBLASInt INFO = 0;
965 
966  // Finally, ready to call the Lapack getrs function
967  LAPACKgetrs_(TRANS, &N, &NRHS, _val.data(), &LDA, _pivots.data(), x_vec.data(), &LDB, &INFO);
968 
969  // Check return value for errors
970  if (INFO != 0)
971  libmesh_error_msg("INFO=" << INFO << ", Error during Lapack LU solve!");
972 
973  // Don't do this if you already made a copy of b above
974  // Swap b and x. The solution will then be in x, and whatever was originally
975  // in x, maybe garbage, maybe nothing, will be in b.
976  // FIXME: Rewrite the LU and Cholesky solves to just take one input, and overwrite
977  // the input. This *should* make user code simpler, as they don't have to create
978  // an extra vector just to pass it in to the solve function!
979  // b.swap(x);
980 }
981 
982 #else
983 
984 template<typename T>
986  DenseVector<T> &)
987 {
988  libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!");
989 }
990 
991 #endif
992 
993 
994 
995 
996 
997 #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
998 
999 template<typename T>
1001  T beta,
1002  DenseVector<T> & dest,
1003  const DenseVector<T> & arg,
1004  bool trans) const
1005 {
1006  // Ensure that dest and arg sizes are compatible
1007  if (!trans)
1008  {
1009  // dest ~ A * arg
1010  // (mx1) (mxn) * (nx1)
1011  if ((dest.size() != this->m()) || (arg.size() != this->n()))
1012  libmesh_error_msg("Improper input argument sizes!");
1013  }
1014 
1015  else // trans == true
1016  {
1017  // Ensure that dest and arg are proper size
1018  // dest ~ A^T * arg
1019  // (nx1) (nxm) * (mx1)
1020  if ((dest.size() != this->n()) || (arg.size() != this->m()))
1021  libmesh_error_msg("Improper input argument sizes!");
1022  }
1023 
1024  // Calling sequence for dgemv:
1025  //
1026  // dgemv(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
1027 
1028  // TRANS (input)
1029  // 't' for transpose, 'n' for non-transpose multiply
1030  // We store everything in row-major order, so pass the transpose flag for
1031  // non-transposed matvecs and the 'n' flag for transposed matvecs
1032  char TRANS[] = "t";
1033  if (trans)
1034  TRANS[0] = 'n';
1035 
1036  // M (input)
1037  // On entry, M specifies the number of rows of the matrix A.
1038  // In C/C++, pass the number of *cols* of A
1039  PetscBLASInt M = this->n();
1040 
1041  // N (input)
1042  // On entry, N specifies the number of columns of the matrix A.
1043  // In C/C++, pass the number of *rows* of A
1044  PetscBLASInt N = this->m();
1045 
1046  // ALPHA (input)
1047  // The scalar constant passed to this function
1048 
1049  // A (input) double precision array of DIMENSION ( LDA, n ).
1050  // Before entry, the leading m by n part of the array A must
1051  // contain the matrix of coefficients.
1052  // The matrix, *this. Note that _matvec_blas is called from
1053  // a const function, vector_mult(), and so we have made this function const
1054  // as well. Since BLAS knows nothing about const, we have to cast it away
1055  // now.
1056  DenseMatrix<T> & a_ref = const_cast<DenseMatrix<T> &> ( *this );
1057  std::vector<T> & a = a_ref.get_values();
1058 
1059  // LDA (input)
1060  // On entry, LDA specifies the first dimension of A as declared
1061  // in the calling (sub) program. LDA must be at least
1062  // max( 1, m ).
1063  PetscBLASInt LDA = M;
1064 
1065  // X (input) double precision array of DIMENSION at least
1066  // ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
1067  // and at least
1068  // ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
1069  // Before entry, the incremented array X must contain the
1070  // vector x.
1071  // Here, we must cast away the const-ness of "arg" since BLAS knows
1072  // nothing about const
1073  DenseVector<T> & x_ref = const_cast<DenseVector<T> &> ( arg );
1074  std::vector<T> & x = x_ref.get_values();
1075 
1076  // INCX (input)
1077  // On entry, INCX specifies the increment for the elements of
1078  // X. INCX must not be zero.
1079  PetscBLASInt INCX = 1;
1080 
1081  // BETA (input)
1082  // On entry, BETA specifies the scalar beta. When BETA is
1083  // supplied as zero then Y need not be set on input.
1084  // The second scalar constant passed to this function
1085 
1086  // Y (input) double precision array of DIMENSION at least
1087  // ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
1088  // and at least
1089  // ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
1090  // Before entry with BETA non-zero, the incremented array Y
1091  // must contain the vector y. On exit, Y is overwritten by the
1092  // updated vector y.
1093  // The input vector "dest"
1094  std::vector<T> & y = dest.get_values();
1095 
1096  // INCY (input)
1097  // On entry, INCY specifies the increment for the elements of
1098  // Y. INCY must not be zero.
1099  PetscBLASInt INCY = 1;
1100 
1101  // Finally, ready to call the BLAS function
1102  BLASgemv_(TRANS, &M, &N, &alpha, a.data(), &LDA, x.data(), &INCX, &beta, y.data(), &INCY);
1103 }
1104 
1105 
1106 #else
1107 
1108 
1109 template<typename T>
1111  T,
1112  DenseVector<T> &,
1113  const DenseVector<T> &,
1114  bool) const
1115 {
1116  libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!");
1117 }
1118 
1119 
1120 #endif
1121 
1122 
1123 //--------------------------------------------------------------
1124 // Explicit instantiations
1125 template void DenseMatrix<Real>::_multiply_blas(const DenseMatrixBase<Real> &, _BLAS_Multiply_Flag);
1127 template void DenseMatrix<Real>::_lu_back_substitute_lapack(const DenseVector<Real> &,
1128  DenseVector<Real> &);
1130  Real,
1131  DenseVector<Real> &,
1132  const DenseVector<Real> &,
1133  bool) const;
1134 template void DenseMatrix<Real>::_svd_lapack(DenseVector<Real> &);
1135 template void DenseMatrix<Real>::_svd_lapack(DenseVector<Real> &,
1136  DenseMatrix<Number> &,
1137  DenseMatrix<Number> &);
1138 template void DenseMatrix<Real>::_svd_helper (char,
1139  char,
1140  std::vector<Real> &,
1141  std::vector<Number> &,
1142  std::vector<Number> &);
1143 template void DenseMatrix<Real>::_svd_solve_lapack (const DenseVector<Real> &, DenseVector<Real> &, Real) const;
1144 template void DenseMatrix<Real>::_evd_lapack(DenseVector<Real> &,
1145  DenseVector<Real> &,
1146  DenseMatrix<Real> *,
1147  DenseMatrix<Real> *);
1148 
1149 #if !(LIBMESH_USE_REAL_NUMBERS)
1150 template void DenseMatrix<Number>::_multiply_blas(const DenseMatrixBase<Number> &, _BLAS_Multiply_Flag);
1152 template void DenseMatrix<Number>::_lu_back_substitute_lapack(const DenseVector<Number> &,
1153  DenseVector<Number> &);
1155  Number,
1156  DenseVector<Number> &,
1157  const DenseVector<Number> &,
1158  bool) const;
1159 template void DenseMatrix<Number>::_svd_lapack(DenseVector<Real> &);
1160 template void DenseMatrix<Number>::_svd_lapack(DenseVector<Real> &,
1161  DenseMatrix<Number> &,
1162  DenseMatrix<Number> &);
1163 template void DenseMatrix<Number>::_svd_helper (char,
1164  char,
1165  std::vector<Real> &,
1166  std::vector<Number> &,
1167  std::vector<Number> &);
1168 template void DenseMatrix<Number>::_svd_solve_lapack (const DenseVector<Number> &, DenseVector<Number> &, Real) const;
1169 template void DenseMatrix<Number>::_evd_lapack(DenseVector<Number> &,
1170  DenseVector<Number> &,
1171  DenseMatrix<Number> *,
1172  DenseMatrix<Number> *);
1173 
1174 #endif
1175 
1176 } // namespace libMesh
virtual unsigned int size() const override
Definition: dense_vector.h:92
void _evd_lapack(DenseVector< T > &lambda_real, DenseVector< T > &lambda_imag, DenseMatrix< T > *VL=nullptr, DenseMatrix< T > *VR=nullptr)
void resize(const unsigned int n)
Definition: dense_vector.h:355
std::vector< T > & get_values()
Definition: dense_vector.h:256
void _lu_back_substitute_lapack(const DenseVector< T > &b, DenseVector< T > &x)
unsigned int m() const
void _svd_lapack(DenseVector< Real > &sigma)
long double max(long double a, double b)
void _svd_solve_lapack(const DenseVector< T > &rhs, DenseVector< T > &x, Real rcond) const
void _multiply_blas(const DenseMatrixBase< T > &other, _BLAS_Multiply_Flag flag)
void _svd_helper(char JOBU, char JOBVT, std::vector< Real > &sigma_val, std::vector< Number > &U_val, std::vector< Number > &VT_val)
std::vector< T > & get_values()
Definition: dense_matrix.h:341
DIE A HORRIBLE DEATH HERE typedef LIBMESH_DEFAULT_SCALAR_TYPE Real
static PetscErrorCode Mat * A
void swap(Iterator &lhs, Iterator &rhs)
void _matvec_blas(T alpha, T beta, DenseVector< T > &dest, const DenseVector< T > &arg, bool trans=false) const
void resize(const unsigned int new_m, const unsigned int new_n)
Definition: dense_matrix.h:792
unsigned int n() const
A matrix object used for finite element assembly and numerics.
Definition: dense_matrix.h:54
long double min(long double a, double b)