SLATE 2024.05.31
Software for Linear Algebra Targeting Exascale
|
Functions | |
template<Target target = Target::HostTask, typename scalar_t > | |
void | slate::internal::he2hb_gemm (scalar_t alpha, Matrix< scalar_t > &&A, Matrix< scalar_t > &&B, scalar_t beta, Matrix< scalar_t > &&C, int panel_rank, int priority, int64_t queue_index) |
Inner product C = AB to update a single block C, where A and B are single blocks. | |
template<typename scalar_t > | |
void | slate::internal::he2hb_gemm (internal::TargetType< Target::HostTask >, scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, Matrix< scalar_t > &C, int panel_rank, int priority, int64_t queue_index) |
Inner product C = AB, Host OpenMP task implementation. | |
template<typename scalar_t > | |
void | slate::internal::he2hb_gemm (internal::TargetType< Target::Devices >, scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, Matrix< scalar_t > &C, int panel_rank, int priority, int64_t queue_index) |
Inner product C = AB, Device implementation. | |
template<Target target = Target::HostTask, typename scalar_t > | |
void | slate::internal::he2hb_hemm (HermitianMatrix< scalar_t > &&A, Matrix< scalar_t > &&B, Matrix< scalar_t > &&C, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index) |
Apply local reflectors on a Hermitian trailing submatrix. | |
template<typename scalar_t > | |
void | slate::internal::he2hb_hemm (internal::TargetType< Target::HostTask >, HermitianMatrix< scalar_t > &A, Matrix< scalar_t > &B, Matrix< scalar_t > &C, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index) |
Apply local reflectors. | |
template<typename scalar_t > | |
void | slate::internal::he2hb_hemm (internal::TargetType< Target::Devices >, HermitianMatrix< scalar_t > &A, Matrix< scalar_t > &B, Matrix< scalar_t > &C, std::vector< int64_t > panel_rank_rows, int priority, int64_t queue_index) |
Apply local reflectors. | |
template<Target target = Target::HostTask, typename scalar_t > | |
void | slate::internal::he2hb_her2k_offdiag_ranks (scalar_t alpha, Matrix< scalar_t > &&A, Matrix< scalar_t > &&B, scalar_t beta, HermitianMatrix< scalar_t > &&C, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index) |
matrix multiply to update trailing matrix, except the diagonal tiles. | |
template<typename scalar_t > | |
void | slate::internal::he2hb_her2k_offdiag_ranks (internal::TargetType< Target::HostTask >, scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, HermitianMatrix< scalar_t > &C, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index) |
matrix multiply to update trailing matrix, except the diagonal tiles. | |
template<typename scalar_t > | |
void | slate::internal::he2hb_her2k_offdiag_ranks (internal::TargetType< Target::Devices >, scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, HermitianMatrix< scalar_t > &C, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index) |
matrix multiply to update trailing matrix, except the diagonal tiles. | |
template<Target target = Target::HostTask, typename scalar_t > | |
void | slate::internal::he2hb_trmm (HermitianMatrix< scalar_t > &&AH, Matrix< scalar_t > &&A, Matrix< scalar_t > &&B, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index) |
Triangular matrix multiply. | |
template<typename scalar_t > | |
void | slate::internal::he2hb_trmm (internal::TargetType< Target::HostTask >, HermitianMatrix< scalar_t > &AH, Matrix< scalar_t > &A, Matrix< scalar_t > &B, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index) |
Triangular matrix multiply. | |
template<typename scalar_t > | |
void | slate::internal::he2hb_trmm (internal::TargetType< Target::Devices >, HermitianMatrix< scalar_t > &AH, Matrix< scalar_t > &A, Matrix< scalar_t > &B, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index) |
Triangular matrix multiply. | |
template<Target target = Target::HostTask, typename scalar_t > | |
void | slate::internal::hettmqr (Op op, Matrix< scalar_t > &&V, Matrix< scalar_t > &&T, HermitianMatrix< scalar_t > &&C, int tag) |
Distributed multiply Hermitian matrix on left and right by Q from QR triangle-triangle factorization of column of tiles. | |
template<typename scalar_t > | |
void | slate::internal::hettmqr (internal::TargetType< Target::HostTask >, Op op, Matrix< scalar_t > &V, Matrix< scalar_t > &T, HermitianMatrix< scalar_t > &C, int tag_base) |
Distributed multiply Hermitian matrix on left and right by Q from QR triangle-triangle factorization of column of tiles. | |
template<Target target = Target::HostTask, typename scalar_t > | |
void | slate::internal::unmtr_hb2st (Side side, Op op, Matrix< scalar_t > &V, Matrix< scalar_t > &C) |
template<Target target, typename scalar_t > | |
void | slate::internal::unmtr_hb2st (internal::TargetType< target >, Side side, Op op, Matrix< scalar_t > &V, Matrix< scalar_t > &C) |
Generic implementation of unmtr_hb2st. | |
void slate::internal::he2hb_gemm | ( | scalar_t | alpha, |
Matrix< scalar_t > && | A, | ||
Matrix< scalar_t > && | B, | ||
scalar_t | beta, | ||
Matrix< scalar_t > && | C, | ||
int | panel_rank, | ||
int | priority, | ||
int64_t | queue_index | ||
) |
Inner product C = AB to update a single block C, where A and B are single blocks.
panel_ranks are the mpi ranks in A block (A_panel.getRanks( &panel_ranks )), panel_rank is in panel_ranks. Loop over the local tiles of A on this panel_rank to update C = AB. Dispatches to target implementations.
todo: add more details
void slate::internal::he2hb_hemm | ( | HermitianMatrix< scalar_t > && | A, |
Matrix< scalar_t > && | B, | ||
Matrix< scalar_t > && | C, | ||
std::vector< int64_t > & | panel_rank_rows, | ||
int | priority, | ||
int64_t | queue_index | ||
) |
Apply local reflectors on a Hermitian trailing submatrix.
Compute Wi = sum_j Aij Vj. Where, A is the Hermitian trailing submatrix. B contains the local reflectors Vj from local QR factorization of a panel of A C is the output matrix contains the Wi = sum_j Aij Vj panel_rank_rows contains the local row indices for panel_rank, where the panel_rank is B.tileRank( i, 0 ), i = 0:nt-1. Dispatches to target implementations.
void slate::internal::he2hb_hemm | ( | internal::TargetType< Target::Devices > | , |
HermitianMatrix< scalar_t > & | A, | ||
Matrix< scalar_t > & | B, | ||
Matrix< scalar_t > & | C, | ||
std::vector< int64_t > | panel_rank_rows, | ||
int | priority, | ||
int64_t | queue_index | ||
) |
Apply local reflectors.
GPU device BLAS implementation.
void slate::internal::he2hb_hemm | ( | internal::TargetType< Target::HostTask > | , |
HermitianMatrix< scalar_t > & | A, | ||
Matrix< scalar_t > & | B, | ||
Matrix< scalar_t > & | C, | ||
std::vector< int64_t > & | panel_rank_rows, | ||
int | priority, | ||
int64_t | queue_index | ||
) |
Apply local reflectors.
Host OpenMP task implementation.
void slate::internal::he2hb_her2k_offdiag_ranks | ( | internal::TargetType< Target::Devices > | , |
scalar_t | alpha, | ||
Matrix< scalar_t > & | A, | ||
Matrix< scalar_t > & | B, | ||
scalar_t | beta, | ||
HermitianMatrix< scalar_t > & | C, | ||
std::vector< int64_t > & | panel_rank_rows, | ||
int | priority, | ||
int64_t | queue_index | ||
) |
matrix multiply to update trailing matrix, except the diagonal tiles.
where A is a single block column and B is a single block row. Device implementation.
void slate::internal::he2hb_her2k_offdiag_ranks | ( | internal::TargetType< Target::HostTask > | , |
scalar_t | alpha, | ||
Matrix< scalar_t > & | A, | ||
Matrix< scalar_t > & | B, | ||
scalar_t | beta, | ||
HermitianMatrix< scalar_t > & | C, | ||
std::vector< int64_t > & | panel_rank_rows, | ||
int | priority, | ||
int64_t | queue_index | ||
) |
matrix multiply to update trailing matrix, except the diagonal tiles.
Host OpenMP task implementation.
void slate::internal::he2hb_her2k_offdiag_ranks | ( | scalar_t | alpha, |
Matrix< scalar_t > && | A, | ||
Matrix< scalar_t > && | B, | ||
scalar_t | beta, | ||
HermitianMatrix< scalar_t > && | C, | ||
std::vector< int64_t > & | panel_rank_rows, | ||
int | priority, | ||
int64_t | queue_index | ||
) |
matrix multiply to update trailing matrix, except the diagonal tiles.
where A is a single block column (contains V after doing QR factorization of a panel k). and B is a single block row (contain W = VT) C is a trailing square submatrix Cij -= AikBjk^H, i = k+1:nt-1, j = k+1:nt-1. Dispatches to target implementations.
void slate::internal::he2hb_trmm | ( | HermitianMatrix< scalar_t > && | AH, |
Matrix< scalar_t > && | A, | ||
Matrix< scalar_t > && | B, | ||
std::vector< int64_t > & | panel_rank_rows, | ||
int | priority, | ||
int64_t | queue_index | ||
) |
Triangular matrix multiply.
Compute B = B A AH is a Hermitian matrix. It's needed here just to check if the rank is an upper or lower rank that contribute to compute Bi, i = 0:mt-1. B is a block column. A contains upper triangular or trapezoid T. indices contains the local indices for panel_rank, If A contains upper triangular T, then call trmm B = B T If A contains trapezoid T, then the slice T = A[ 0:A.mb(), 0:A.mb() ] is upper triangular, Bi = Bi[ 0:B.mb(), 0:A.mb() ]. Call trmm Bi = Bi T. Dispatches to target implementations.
panel_rank_rows contains the local row-indices of B
void slate::internal::he2hb_trmm | ( | internal::TargetType< Target::Devices > | , |
HermitianMatrix< scalar_t > & | AH, | ||
Matrix< scalar_t > & | A, | ||
Matrix< scalar_t > & | B, | ||
std::vector< int64_t > & | panel_rank_rows, | ||
int | priority, | ||
int64_t | queue_index | ||
) |
Triangular matrix multiply.
Device implementation.
void slate::internal::he2hb_trmm | ( | internal::TargetType< Target::HostTask > | , |
HermitianMatrix< scalar_t > & | AH, | ||
Matrix< scalar_t > & | A, | ||
Matrix< scalar_t > & | B, | ||
std::vector< int64_t > & | panel_rank_rows, | ||
int | priority, | ||
int64_t | queue_index | ||
) |
Triangular matrix multiply.
Host OpenMP task implementation.
void slate::internal::hettmqr | ( | internal::TargetType< Target::HostTask > | , |
Op | op, | ||
Matrix< scalar_t > & | V, | ||
Matrix< scalar_t > & | T, | ||
HermitianMatrix< scalar_t > & | C, | ||
int | tag_base | ||
) |
Distributed multiply Hermitian matrix on left and right by Q from QR triangle-triangle factorization of column of tiles.
Host implementation.
[in] | tag_base | This process uses MPI tags from the range [tag_base, tag_base+mt*nt) |
void slate::internal::hettmqr | ( | Op | op, |
Matrix< scalar_t > && | V, | ||
Matrix< scalar_t > && | T, | ||
HermitianMatrix< scalar_t > && | C, | ||
int | tag | ||
) |
Distributed multiply Hermitian matrix on left and right by Q from QR triangle-triangle factorization of column of tiles.
Dispatches to target implementations. todo: This assumes A and T have already been communicated as needed. However, it necessarily handles communication for C. Tag is used in geqrf to differentiate communication for look-ahead panel from rest of trailing matrix.
void slate::internal::unmtr_hb2st | ( | internal::TargetType< target > | , |
Side | side, | ||
Op | op, | ||
Matrix< scalar_t > & | V, | ||
Matrix< scalar_t > & | C | ||
) |
Generic implementation of unmtr_hb2st.
SLATE Working Note 13: Implementing Singular Value and Symmetric/Hermitian Eigenvalue Solvers, SLATE Working Notes, no. 13. https://www.icl.utk.edu/publications/swan-013