SLATE 2024.05.31
Software for Linear Algebra Targeting Exascale
|
Functions | |
template<typename scalar_t > | |
void | slate::hb2st (HermitianBandMatrix< scalar_t > &A, Matrix< scalar_t > &V, Options const &opts) |
Reduces a band Hermitian matrix to a bidiagonal matrix using bulge chasing. | |
template<typename scalar_t > | |
void | slate::he2hb (HermitianMatrix< scalar_t > &A, TriangularFactors< scalar_t > &T, Options const &opts) |
Distributed parallel reduction to band for 3-stage SVD. | |
template<typename scalar_t > | |
void | slate::internal::herf (int64_t n, scalar_t *v, HermitianMatrix< scalar_t > &A) |
Applies a Householder reflector \(H = I - \tau v v^H\) to the Hermitian matrix \(A\) on the left and right. | |
template<Target target, typename scalar_t > | |
void | slate::internal::hebr1 (int64_t n, scalar_t *v, HermitianMatrix< scalar_t > &&A, int priority) |
Implements task type 1 in the tridiagonal bulge chasing algorithm. | |
template<Target target, typename scalar_t > | |
void | slate::internal::hebr2 (int64_t n1, scalar_t *v1, int64_t n2, scalar_t *v2, Matrix< scalar_t > &&A, int priority) |
Implements task type 2 in the tridiagonal bulge chasing algorithm. | |
template<Target target, typename scalar_t > | |
void | slate::internal::hebr3 (int64_t n, scalar_t *v, HermitianMatrix< scalar_t > &&A, int priority) |
Implements task type 3 in the tridiagonal bulge chasing algorithm. | |
template<typename scalar_t > | |
void | slate::internal::hebr3 (internal::TargetType< Target::HostTask >, int64_t n, scalar_t *v, HermitianMatrix< scalar_t > &A, int priority) |
Implements task type 3 in the tridiagonal bulge chasing algorithm, updating a diagonal block with a 2-sided Householder transformation. | |
template<typename real_t > | |
void | slate::stedc (std::vector< real_t > &D, std::vector< real_t > &E, Matrix< real_t > &Q, Options const &opts) |
Computes all eigenvalues and eigenvectors of a real symmetric tridiagonal matrix in parallel, using the divide and conquer algorithm. | |
template<typename real_t > | |
void | slate::stedc_deflate (int64_t n, int64_t n1, real_t &rho, real_t *D, real_t *Dsecular, real_t *z, real_t *zsecular, Matrix< real_t > &Q, Matrix< real_t > &Qtype, int64_t *itype, int64_t &nsecular, int64_t &Qtype12_begin, int64_t &Qtype12_end, int64_t &Qtype23_begin, int64_t &Qtype23_end, Options const &opts) |
Sorts the two sets of eigenvalues together into a single sorted set, then deflates eigenvalues, which both resolves stability issues (division by zero) and reduces the size of the problem. | |
template<typename real_t > | |
void | slate::stedc_merge (int64_t n, int64_t n1, real_t rho, real_t *D, Matrix< real_t > &Q, Matrix< real_t > &Qtype, Matrix< real_t > &U, Options const &opts) |
Computes the updated eigensystem of a diagonal matrix after modification by a rank-one symmetric matrix, in parallel. | |
template<typename real_t > | |
void | slate::stedc_secular (int64_t nsecular, int64_t n, real_t rho, real_t *D, real_t *z, real_t *Lambda, Matrix< real_t > &U, int64_t *itype, Options const &opts) |
Finds the nsecular roots of the secular equation, as defined by the values in rho, D, z, and computes the corresponding eigenvectors in U. | |
template<typename real_t > | |
void | slate::stedc_solve (std::vector< real_t > &D, std::vector< real_t > &E, Matrix< real_t > &Q, Matrix< real_t > &W, Matrix< real_t > &U, Options const &opts) |
Computes all eigenvalues and eigenvectors of a symmetric tridiagonal matrix in parallel, using the divide and conquer algorithm. | |
template<typename real_t > | |
void | slate::stedc_z_vector (Matrix< real_t > &Q, std::vector< real_t > &z, Options const &opts) |
Communicates the z vector to all ranks. | |
template<typename scalar_t > | |
void | slate::steqr2 (Job jobz, std::vector< blas::real_type< scalar_t > > &D, std::vector< blas::real_type< scalar_t > > &E, Matrix< scalar_t > &Z, Options const &opts) |
Computes all eigenvalues/eigenvectors of a symmetric tridiagonal matrix using the Pal-Walker-Kahan variant of the QL or QR algorithm. | |
template<typename scalar_t > | |
void | slate::sterf (std::vector< scalar_t > &D, std::vector< scalar_t > &E, Options const &opts) |
Computes all eigenvalues of a symmetric tridiagonal matrix using the Pal-Walker-Kahan variant of the QL or QR algorithm. | |
template<typename scalar_t > | |
void | slate::unmtr_hb2st (Side side, Op op, Matrix< scalar_t > &V, Matrix< scalar_t > &C, const std::map< Option, Value > &opts) |
Multiplies the general m-by-n matrix C by Q from slate::hb2st as follows: | |
template<typename scalar_t > | |
void | slate::unmtr_he2hb (Side side, Op op, HermitianMatrix< scalar_t > &A, TriangularFactors< scalar_t > T, Matrix< scalar_t > &C, Options const &opts) |
Multiplies the general m-by-n matrix C by Q from slate::he2hb as follows: | |
void slate::hb2st | ( | HermitianBandMatrix< scalar_t > & | A, |
Matrix< scalar_t > & | V, | ||
Options const & | opts | ||
) |
Reduces a band Hermitian matrix to a bidiagonal matrix using bulge chasing.
scalar_t | One of float, double, std::complex<float>, std::complex<double>. |
[in,out] | A | The band Hermitian matrix A. |
[out] | V | Matrix of Householder reflectors produced in the process. Dimension 2*band-by-XYZ todo |
[in] | opts | Additional options, as map of name = value pairs. Possible options:
|
void slate::he2hb | ( | HermitianMatrix< scalar_t > & | A, |
TriangularFactors< scalar_t > & | T, | ||
Options const & | opts | ||
) |
Distributed parallel reduction to band for 3-stage SVD.
Reduces an n-by-n Hermitian matrix \(A\) to band form using unitary transformations. The factorization has the form
\[ A = Q B Q^H \]
where \(Q\) is unitary and \(B\) is Hermitian band with nb sub and superdiagonals.
scalar_t | One of float, double, std::complex<float>, std::complex<double>. |
[in,out] | A | On entry, the n-by-n Hermitian matrix \(A\). On exit:
|
[out] | T | On exit, triangular matrices of the block reflectors for Q. |
[in] | opts | Additional options, as map of name = value pairs. Possible options:
|
void slate::internal::hebr1 | ( | int64_t | n, |
scalar_t * | v, | ||
HermitianMatrix< scalar_t > && | A, | ||
int | priority | ||
) |
Implements task type 1 in the tridiagonal bulge chasing algorithm.
Dispatches to target implementations.
void slate::internal::hebr2 | ( | int64_t | n1, |
scalar_t * | v1, | ||
int64_t | n2, | ||
scalar_t * | v2, | ||
Matrix< scalar_t > && | A, | ||
int | priority | ||
) |
Implements task type 2 in the tridiagonal bulge chasing algorithm.
Dispatches to target implementations.
void slate::internal::hebr3 | ( | int64_t | n, |
scalar_t * | v, | ||
HermitianMatrix< scalar_t > && | A, | ||
int | priority | ||
) |
Implements task type 3 in the tridiagonal bulge chasing algorithm.
Dispatches to target implementations.
void slate::internal::hebr3 | ( | internal::TargetType< Target::HostTask > | , |
int64_t | n, | ||
scalar_t * | v, | ||
HermitianMatrix< scalar_t > & | A, | ||
int | priority | ||
) |
Implements task type 3 in the tridiagonal bulge chasing algorithm, updating a diagonal block with a 2-sided Householder transformation.
[in] | n | Length of vector v. |
[in] | v | The Householder reflector produced by task type 2. |
[in,out] | A | A diagonal block in a sweep. |
void slate::internal::herf | ( | int64_t | n, |
scalar_t * | v, | ||
HermitianMatrix< scalar_t > & | A | ||
) |
Applies a Householder reflector \(H = I - \tau v v^H\) to the Hermitian matrix \(A\) on the left and right.
Takes the \(\tau\) factor from \(v[0]\).
[in] | n | Length of vector v. |
[in] | v | The vector v in the representation of H. Modified but restored on exit. |
[in,out] | A | The n-by-n Hermitian matrix A. |
void slate::stedc | ( | std::vector< real_t > & | D, |
std::vector< real_t > & | E, | ||
Matrix< real_t > & | Q, | ||
Options const & | opts | ||
) |
Computes all eigenvalues and eigenvectors of a real symmetric tridiagonal matrix in parallel, using the divide and conquer algorithm.
real_t | One of float, double. |
[in,out] | D | On entry, the diagonal elements of the tridiagonal matrix. On exit, the eigenvalues in ascending order. D is duplicated on all MPI ranks. |
[in,out] | E | On entry, the subdiagonal elements of the tridiagonal matrix. On exit, E has been destroyed. E is duplicated on all MPI ranks. |
[out] | Q | On exit, Q contains the orthonormal eigenvectors of the symmetric tridiagonal matrix. Code currently requires that Q has a 2D block-cyclic distribution, with column-major grid order, which is the default for a SLATE Matrix. |
[in] | opts | Additional options, as map of name = value pairs. Possible options:
|
void slate::stedc_deflate | ( | int64_t | n, |
int64_t | n1, | ||
real_t & | rho, | ||
real_t * | D, | ||
real_t * | Dsecular, | ||
real_t * | z, | ||
real_t * | zsecular, | ||
Matrix< real_t > & | Q, | ||
Matrix< real_t > & | Qtype, | ||
int64_t * | itype, | ||
int64_t & | nsecular, | ||
int64_t & | Qtype12_begin, | ||
int64_t & | Qtype12_end, | ||
int64_t & | Qtype23_begin, | ||
int64_t & | Qtype23_end, | ||
Options const & | opts | ||
) |
Sorts the two sets of eigenvalues together into a single sorted set, then deflates eigenvalues, which both resolves stability issues (division by zero) and reduces the size of the problem.
There are two ways in which deflation can occur: 1) There is a zero or tiny entry in the z vector, indicating an eigenvalue of the subproblems is already converged to an eigenvalue of the merged problem, so it can be deflated. 2) Two eigenvalues are (nearly) identical. In this case, a Givens rotation is applied to zero an entry of z, and the corresponding eigenvalue can be deflated. For each such occurrence the order of the related secular equation problem is reduced by one.
Corresponds to ScaLAPACK pdlaed2. (Was: indicates ScaLAPACK names.)
real_t | One of float, double. |
[in] | n | Size of merged system. n = n1 + n2. |
[in] | n1 | Size of first subproblem, D1. Note that n2, the size of second subproblem, D2, is not passed and is computed as n2 = n - n1. |
[in,out] | rho | On entry, the off-diagonal element associated with the rank-1 cut that originally split the two submatrices to be merged. On exit, rho has been scaled to be positive and make \(z\) unit norm, as required by stedc_secular. |
[in,out] | D | Real vector of dimension n. On entry, D1 = D[ 0 : n1-1 ] contains eigenvalues of the first subproblem, D2 = D[ n1 : n-1 ] contains eigenvalues of the second subproblem. On exit, deflated eigenvalues are in decreasing order at the local end within a process column (pcol) per the 2D block cyclic distribution of Q. Example on 2 ranks with nb = 4. To easily track values, let D1 be multiples of 3, and D2 be even numbers. Deflate arbitrary values 6, 9, 21, 4, 12 (see ^ marks). Value 6 is type 2 deflation since it is repeated; the rest are type 1 deflation. Note 9, 6, 4 stay on pcol 0, and 21, 12 stay on pcol 1. pcol = [ 0 0 0 0 | 1 1 1 1 | 0 0 0 0 | 1 1 1 1 ] D_in = [ 3 6 9 12 | 15 18 21 24 | 2 4 6 8 | 10 12 14 16 ] [ ^ ^ | ^ | ^ | ^ ] D_out = [ # # # # | # # # # | # 9 6 4 | # # 21 12 ] [ | | ^ ^ ^ | ^ ^ ] Dsecular = [ 2 3 6 8 | 10 12 14 15 | 16 18 24 # | # # # # ] where | separate blocks, and # values are not set. |
[out] | Dsecular | Real vector of dimension n. On exit, Dsecular[ 0 : nsecular-1 ] contains non-deflated eigenvalues, sorted in increasing order. (Was: dlamda) |
[in,out] | z | Real vector of dimension n. On entry, z is the updating vector, z = Q^T v = [ Q1^T 0 ] [ e_n1 ], [ 0 Q2^T ] [ e_1 ] which is the last row of Q1 and the first row of Q2. On exit, the contents of z are destroyed. |
[out] | zsecular | Real vector of dimension n. On exit, zsecular[ 0 : nsecular-1 ] has non-deflated entries of z, as updated by normalizing and applying Givens rotations in deflation, for use by stedc_secular. (Was: w) |
[in,out] | Q | Real n-by-n matrix. On entry, the eigenvectors of the two subproblems, Q = [ Q1 0 ]. [ 0 Q2 ] On exit, eigenvectors associated with type 2 deflation have been modified. |
[in,out] | Qtype | Real n-by-n matrix. A copy of all the eigenvectors locally ordered by column type such that Qtype(:, itype) = Q(:, ideflate). Non-deflated eigenvectors will be used by stedc_merge in a matrix multiply (gemm) to solve for the new eigenvectors; deflated eigenvectors will be copied back to Q in the correct place. |
[out] | ct_count | Integer npcol-by-5 array. On exit, ct_count( pcol, ctype ) is the number of columns of column type ctype on process column pcol. ctype = 1:4, column ctype = 0 is unused. |
[out] | itype | Integer vector of dimension n. On exit, permutation to arrange columns of Qtype locally into 4 column types based on block structure: 1: non-zero in upper half only (rows 0 : n1-1). 2: non-zero in all rows; non-deflated eigenvector from type 2 deflation when one vector is from Q1 and the other vector is from Q2. 3: non-zero in lower half only (rows n1 : n-1). 4: may be non-zero in all rows; deflated eigenvectors. (Was: indx) |
[out] | nsecular | On exit, number of non-deflated eigenvalues. (Was: k) |
[out] | Qtype12_begin | On exit, index of first column in Qtype sub-matrix spanning column types 1 and 2. Because of local permutation, this may include columns of types 3 and 4. (Was: ib1) |
[out] | Qtype12_end | On exit, index of last column + 1 in Qtype sub-matrix spanning column types 1 and 2. (Was: ib1 + nn1) |
[out] | Qtype23_begin | On exit, index of first column in Qtype sub-matrix spanning column types 2 and 3. Because of local permutation, this may include columns of types 1 and 4. (Was: ib2) |
[out] | Qtype23_end | On exit, index of last column + 1 in Qtype sub-matrix spanning column types 2 and 3. (Was: ib2 + nn2) |
[in] | opts | Additional options, as map of name = value pairs. Possible options:
|
void slate::stedc_merge | ( | int64_t | n, |
int64_t | n1, | ||
real_t | rho, | ||
real_t * | D, | ||
Matrix< real_t > & | Q, | ||
Matrix< real_t > & | Qtype, | ||
Matrix< real_t > & | U, | ||
Options const & | opts | ||
) |
Computes the updated eigensystem of a diagonal matrix after modification by a rank-one symmetric matrix, in parallel.
\[ T = Q_{in} ( D_{in} + \rho Z Z^H ) Q_{in}^H = Q_{out} D_{out} Q_{out}^H \]
where \(z = Q^H v\) and \(v = [ e_{n1} e_1 ]\) is a vector of length \(n\) with ones in the \(n1\) and \(n1 + 1\) elements and zeros elsewhere.
The eigenvectors of the original matrix are stored in Q, and the eigenvalues are in D. The algorithm consists of three stages:
The first stage consists of deflating the size of the problem when there are multiple eigenvalues or if there is a zero in the \(z\) vector. For each such occurence, the dimension of the secular equation problem is reduced by one. This stage is performed by the routine stedc_deflate.
The second stage consists of calculating the updated eigenvalues. This is done by finding the roots of the secular equation via the LAPACK routine laed4, called by stedc_secular. This routine also calculates the eigenvectors of the current problem.
The final stage consists of computing the updated eigenvectors directly using the updated eigenvalues. The eigenvectors for the current problem are multiplied with the eigenvectors from the overall problem.
Corresponds to ScaLAPACK pdlaed1.
real_t | One of float, double. |
[in] | n | Size of merged system. n = n1 + n2. |
[in] | n1 | Size of first subproblem, D1. Size of second subproblem, D2, is n2 = n - n1. |
[in,out] | rho | On entry, the off-diagonal element associated with the rank-1 cut that originally split the two submatrices to be merged. On exit, updated by deflation process. |
[in,out] | D | On entry, the eigenvalues of two subproblems. On exit, the eigenvalues of the merged problem, not sorted. |
[in,out] | Q | On entry, Q contains eigenvectors of subproblems. On exit, Q contains the orthonormal eigenvectors of the merged problem. |
[out] | Qtype | Qtype is a workspace, the same size as Q. |
[out] | U | U is a workspace, the same size as Q. |
[in] | opts | Additional options, as map of name = value pairs. Possible options:
|
void slate::stedc_secular | ( | int64_t | nsecular, |
int64_t | n, | ||
real_t | rho, | ||
real_t * | D, | ||
real_t * | z, | ||
real_t * | Lambda, | ||
Matrix< real_t > & | U, | ||
int64_t * | itype, | ||
Options const & | opts | ||
) |
Finds the nsecular roots of the secular equation, as defined by the values in rho, D, z, and computes the corresponding eigenvectors in U.
Corresponds to ScaLAPACK pdlaed3.
real_t | One of float, double. |
[in] | nsecular | The number of non-deflated eigenvalues, and the order of the related secular equation. 0 <= nsecular <= n. |
[in] | n | The number of rows and columns in the U matrix. n >= nsecular (deflation may result in n > nsecular). |
[in] | rho | The off-diagonal element associated with the rank-1 cut that originally split the two submatrices to be merged, as modified by stedc_deflate to be positive and make \(z\) unit norm. |
[in] | D | The nsecular non-deflated eigenvalues of the two sub-problems, as output by stedc_deflate in Dsecular, which will be used by laed4 to form the secular equation. |
[in] | z | The nsecular entries of the deflation-adjusted z vector, as output by stedc_deflate in zsecular. |
[in,out] | Lambda | On entry, Lambda contains the (n - nsecular) deflated eigenvalues, as output by stedc_deflate in D. On exit, Lambda contains all n eigenvalues, permuted by itype. |
[out] | U | On exit, U contains the orthonormal eigenvectors from the secular equation, permuted by itype. |
[in] | itype | Permutation that arranged Qtype locally into 4 groups by column type, Qtype = [ Q11 Q12 Q14 ], [ Q22 Q23 Q24 ] as output by stedc_deflate. Used to permute Lambda and U to match Qtype. |
[in] | opts | Additional options, as map of name = value pairs. Possible options:
|
void slate::stedc_solve | ( | std::vector< real_t > & | D, |
std::vector< real_t > & | E, | ||
Matrix< real_t > & | Q, | ||
Matrix< real_t > & | W, | ||
Matrix< real_t > & | U, | ||
Options const & | opts | ||
) |
Computes all eigenvalues and eigenvectors of a symmetric tridiagonal matrix in parallel, using the divide and conquer algorithm.
Corresponds to ScaLAPACK pdlaed0.
real_t | One of float, double. |
[in,out] | D | On entry, the diagonal elements of the tridiagonal matrix. On exit, the eigenvalues, not sorted. |
[in,out] | E | On entry, the subdiagonal elements of the tridiagonal matrix. On exit, E has been destroyed. |
[in,out] | Q | On entry, Q is the Identity. On exit, Q contains the orthonormal eigenvectors of the symmetric tridiagonal matrix. |
[out] | W | W is a workspace, the same size as Q. |
[out] | U | U is a workspace, the same size as Q. |
[in] | opts | Additional options, as map of name = value pairs. Possible options:
|
void slate::stedc_z_vector | ( | Matrix< real_t > & | Q, |
std::vector< real_t > & | z, | ||
Options const & | opts | ||
) |
Communicates the z vector to all ranks.
Corresponds to ScaLAPACK pdlaedz.
real_t | One of float, double. |
[in] | Q | On entry, matrix of eigenvectors for 2 sub-problems being merged, Q = [ Q1 0 ]. [ 0 Q2 ] If Q is nt-by-nt tiles, then: Q1 is nt1-by-nt1 tiles with nt1 = floor( nt / 2 ), Q2 is nt2-by-nt2 tiles with nt2 = ceil( nt / 2 ). |
[out] | z | On exit, z vector in divide-and-conquer algorithm, consisting of the last row of Q1 and the first row of Q2: z = Q^T [ e_{n_1} ] = [ Q1^T e_{n_1} ]. [ e_1 ] [ Q2^T e_1 ] z is duplicated on all MPI ranks. |
[in] | opts | Additional options, as map of name = value pairs. Possible options:
|
void slate::steqr2 | ( | Job | jobz, |
std::vector< blas::real_type< scalar_t > > & | D, | ||
std::vector< blas::real_type< scalar_t > > & | E, | ||
Matrix< scalar_t > & | Z, | ||
Options const & | opts | ||
) |
Computes all eigenvalues/eigenvectors of a symmetric tridiagonal matrix using the Pal-Walker-Kahan variant of the QL or QR algorithm.
ATTENTION: only host computation supported for now
void slate::sterf | ( | std::vector< scalar_t > & | D, |
std::vector< scalar_t > & | E, | ||
Options const & | opts | ||
) |
Computes all eigenvalues of a symmetric tridiagonal matrix using the Pal-Walker-Kahan variant of the QL or QR algorithm.
ATTENTION: only host computation supported for now
void slate::unmtr_hb2st | ( | Side | side, |
Op | op, | ||
Matrix< scalar_t > & | V, | ||
Matrix< scalar_t > & | C, | ||
const std::map< Option, Value > & | opts | ||
) |
Multiplies the general m-by-n matrix C by Q from slate::hb2st
as follows:
op | side = Left | side = Right (not supported) |
---|---|---|
op = NoTrans | \(Q C \) | \(C Q \) |
op = ConjTrans | \(Q^H C\) | \(C Q^H\) |
where \(Q\) is a unitary matrix defined as the product of k elementary reflectors
\[ Q = H(1) H(2) . . . H(k) \]
scalar_t | One of float, double, std::complex<float>, std::complex<double>. |
[in] | side |
|
[in] | op |
|
[in] | V | Householder vectors as returned by slate::hb2st . |
[in,out] | C | On entry, the m-by-n matrix \(C\). On exit, \(C\) is overwritten by \(Q C\), \(Q^H C\), \(C Q\), or \(C Q^H\). C must be distributed 1D block column (cyclic or non-cyclic). |
[in] | opts | Additional options, as map of name = value pairs. Possible options:
|
void slate::unmtr_he2hb | ( | Side | side, |
Op | op, | ||
HermitianMatrix< scalar_t > & | A, | ||
TriangularFactors< scalar_t > | T, | ||
Matrix< scalar_t > & | C, | ||
Options const & | opts | ||
) |
Multiplies the general m-by-n matrix C by Q from slate::he2hb
as follows:
op | side = Left | side = Right |
---|---|---|
op = NoTrans | \(Q C \) | \(C Q \) |
op = ConjTrans | \(Q^H C\) | \(C Q^H\) |
where \(Q\) is a unitary matrix defined as the product of k elementary reflectors
\[ Q = H(1) H(2) . . . H(k) \]
scalar_t | One of float, double, std::complex<float>, std::complex<double>. |
[in] | side |
|
[in] | op |
|
[in] | A | On entry, the n-by-n Hermitian matrix \(A\), as returned by slate::he2hb . |
[in] | T | On entry, triangular matrices of the elementary reflector H(i), as returned by slate::he2hb . |
[in,out] | C | On entry, the m-by-n matrix \(C\). On exit, \(C\) is overwritten by \(Q C\), \(Q^H C\), \(C Q\), or \(C Q^H\). |
[in] | opts | Additional options, as map of name = value pairs. Possible options:
|