SLATE 2024.05.31
Software for Linear Algebra Targeting Exascale
Loading...
Searching...
No Matches

Functions

template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::he2hb_gemm (scalar_t alpha, Matrix< scalar_t > &&A, Matrix< scalar_t > &&B, scalar_t beta, Matrix< scalar_t > &&C, int panel_rank, int priority, int64_t queue_index)
 Inner product C = AB to update a single block C, where A and B are single blocks.
 
template<typename scalar_t >
void slate::internal::he2hb_gemm (internal::TargetType< Target::HostTask >, scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, Matrix< scalar_t > &C, int panel_rank, int priority, int64_t queue_index)
 Inner product C = AB, Host OpenMP task implementation.
 
template<typename scalar_t >
void slate::internal::he2hb_gemm (internal::TargetType< Target::Devices >, scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, Matrix< scalar_t > &C, int panel_rank, int priority, int64_t queue_index)
 Inner product C = AB, Device implementation.
 
template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::he2hb_hemm (HermitianMatrix< scalar_t > &&A, Matrix< scalar_t > &&B, Matrix< scalar_t > &&C, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 Apply local reflectors on a Hermitian trailing submatrix.
 
template<typename scalar_t >
void slate::internal::he2hb_hemm (internal::TargetType< Target::HostTask >, HermitianMatrix< scalar_t > &A, Matrix< scalar_t > &B, Matrix< scalar_t > &C, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 Apply local reflectors.
 
template<typename scalar_t >
void slate::internal::he2hb_hemm (internal::TargetType< Target::Devices >, HermitianMatrix< scalar_t > &A, Matrix< scalar_t > &B, Matrix< scalar_t > &C, std::vector< int64_t > panel_rank_rows, int priority, int64_t queue_index)
 Apply local reflectors.
 
template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::he2hb_her2k_offdiag_ranks (scalar_t alpha, Matrix< scalar_t > &&A, Matrix< scalar_t > &&B, scalar_t beta, HermitianMatrix< scalar_t > &&C, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 matrix multiply to update trailing matrix, except the diagonal tiles.
 
template<typename scalar_t >
void slate::internal::he2hb_her2k_offdiag_ranks (internal::TargetType< Target::HostTask >, scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, HermitianMatrix< scalar_t > &C, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 matrix multiply to update trailing matrix, except the diagonal tiles.
 
template<typename scalar_t >
void slate::internal::he2hb_her2k_offdiag_ranks (internal::TargetType< Target::Devices >, scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, HermitianMatrix< scalar_t > &C, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 matrix multiply to update trailing matrix, except the diagonal tiles.
 
template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::he2hb_trmm (HermitianMatrix< scalar_t > &&AH, Matrix< scalar_t > &&A, Matrix< scalar_t > &&B, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 Triangular matrix multiply.
 
template<typename scalar_t >
void slate::internal::he2hb_trmm (internal::TargetType< Target::HostTask >, HermitianMatrix< scalar_t > &AH, Matrix< scalar_t > &A, Matrix< scalar_t > &B, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 Triangular matrix multiply.
 
template<typename scalar_t >
void slate::internal::he2hb_trmm (internal::TargetType< Target::Devices >, HermitianMatrix< scalar_t > &AH, Matrix< scalar_t > &A, Matrix< scalar_t > &B, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 Triangular matrix multiply.
 
template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::hettmqr (Op op, Matrix< scalar_t > &&V, Matrix< scalar_t > &&T, HermitianMatrix< scalar_t > &&C, int tag)
 Distributed multiply Hermitian matrix on left and right by Q from QR triangle-triangle factorization of column of tiles.
 
template<typename scalar_t >
void slate::internal::hettmqr (internal::TargetType< Target::HostTask >, Op op, Matrix< scalar_t > &V, Matrix< scalar_t > &T, HermitianMatrix< scalar_t > &C, int tag_base)
 Distributed multiply Hermitian matrix on left and right by Q from QR triangle-triangle factorization of column of tiles.
 
template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::unmtr_hb2st (Side side, Op op, Matrix< scalar_t > &V, Matrix< scalar_t > &C)
 
template<Target target, typename scalar_t >
void slate::internal::unmtr_hb2st (internal::TargetType< target >, Side side, Op op, Matrix< scalar_t > &V, Matrix< scalar_t > &C)
 Generic implementation of unmtr_hb2st.
 

Detailed Description

Function Documentation

◆ he2hb_gemm()

template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::he2hb_gemm ( scalar_t  alpha,
Matrix< scalar_t > &&  A,
Matrix< scalar_t > &&  B,
scalar_t  beta,
Matrix< scalar_t > &&  C,
int  panel_rank,
int  priority,
int64_t  queue_index 
)

Inner product C = AB to update a single block C, where A and B are single blocks.

panel_ranks are the mpi ranks in A block (A_panel.getRanks( &panel_ranks )), panel_rank is in panel_ranks. Loop over the local tiles of A on this panel_rank to update C = AB. Dispatches to target implementations.

todo: add more details

◆ he2hb_hemm() [1/3]

template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::he2hb_hemm ( HermitianMatrix< scalar_t > &&  A,
Matrix< scalar_t > &&  B,
Matrix< scalar_t > &&  C,
std::vector< int64_t > &  panel_rank_rows,
int  priority,
int64_t  queue_index 
)

Apply local reflectors on a Hermitian trailing submatrix.

Compute Wi = sum_j Aij Vj. Where, A is the Hermitian trailing submatrix. B contains the local reflectors Vj from local QR factorization of a panel of A C is the output matrix contains the Wi = sum_j Aij Vj panel_rank_rows contains the local row indices for panel_rank, where the panel_rank is B.tileRank( i, 0 ), i = 0:nt-1. Dispatches to target implementations.

◆ he2hb_hemm() [2/3]

template<typename scalar_t >
void slate::internal::he2hb_hemm ( internal::TargetType< Target::Devices ,
HermitianMatrix< scalar_t > &  A,
Matrix< scalar_t > &  B,
Matrix< scalar_t > &  C,
std::vector< int64_t >  panel_rank_rows,
int  priority,
int64_t  queue_index 
)

Apply local reflectors.

GPU device BLAS implementation.

◆ he2hb_hemm() [3/3]

template<typename scalar_t >
void slate::internal::he2hb_hemm ( internal::TargetType< Target::HostTask ,
HermitianMatrix< scalar_t > &  A,
Matrix< scalar_t > &  B,
Matrix< scalar_t > &  C,
std::vector< int64_t > &  panel_rank_rows,
int  priority,
int64_t  queue_index 
)

Apply local reflectors.

Host OpenMP task implementation.

◆ he2hb_her2k_offdiag_ranks() [1/3]

template<typename scalar_t >
void slate::internal::he2hb_her2k_offdiag_ranks ( internal::TargetType< Target::Devices ,
scalar_t  alpha,
Matrix< scalar_t > &  A,
Matrix< scalar_t > &  B,
scalar_t  beta,
HermitianMatrix< scalar_t > &  C,
std::vector< int64_t > &  panel_rank_rows,
int  priority,
int64_t  queue_index 
)

matrix multiply to update trailing matrix, except the diagonal tiles.

where A is a single block column and B is a single block row. Device implementation.

◆ he2hb_her2k_offdiag_ranks() [2/3]

template<typename scalar_t >
void slate::internal::he2hb_her2k_offdiag_ranks ( internal::TargetType< Target::HostTask ,
scalar_t  alpha,
Matrix< scalar_t > &  A,
Matrix< scalar_t > &  B,
scalar_t  beta,
HermitianMatrix< scalar_t > &  C,
std::vector< int64_t > &  panel_rank_rows,
int  priority,
int64_t  queue_index 
)

matrix multiply to update trailing matrix, except the diagonal tiles.

Host OpenMP task implementation.

◆ he2hb_her2k_offdiag_ranks() [3/3]

template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::he2hb_her2k_offdiag_ranks ( scalar_t  alpha,
Matrix< scalar_t > &&  A,
Matrix< scalar_t > &&  B,
scalar_t  beta,
HermitianMatrix< scalar_t > &&  C,
std::vector< int64_t > &  panel_rank_rows,
int  priority,
int64_t  queue_index 
)

matrix multiply to update trailing matrix, except the diagonal tiles.

where A is a single block column (contains V after doing QR factorization of a panel k). and B is a single block row (contain W = VT) C is a trailing square submatrix Cij -= AikBjk^H, i = k+1:nt-1, j = k+1:nt-1. Dispatches to target implementations.

◆ he2hb_trmm() [1/3]

template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::he2hb_trmm ( HermitianMatrix< scalar_t > &&  AH,
Matrix< scalar_t > &&  A,
Matrix< scalar_t > &&  B,
std::vector< int64_t > &  panel_rank_rows,
int  priority,
int64_t  queue_index 
)

Triangular matrix multiply.

Compute B = B A AH is a Hermitian matrix. It's needed here just to check if the rank is an upper or lower rank that contribute to compute Bi, i = 0:mt-1. B is a block column. A contains upper triangular or trapezoid T. indices contains the local indices for panel_rank, If A contains upper triangular T, then call trmm B = B T If A contains trapezoid T, then the slice T = A[ 0:A.mb(), 0:A.mb() ] is upper triangular, Bi = Bi[ 0:B.mb(), 0:A.mb() ]. Call trmm Bi = Bi T. Dispatches to target implementations.

panel_rank_rows contains the local row-indices of B

◆ he2hb_trmm() [2/3]

template<typename scalar_t >
void slate::internal::he2hb_trmm ( internal::TargetType< Target::Devices ,
HermitianMatrix< scalar_t > &  AH,
Matrix< scalar_t > &  A,
Matrix< scalar_t > &  B,
std::vector< int64_t > &  panel_rank_rows,
int  priority,
int64_t  queue_index 
)

Triangular matrix multiply.

Device implementation.

◆ he2hb_trmm() [3/3]

template<typename scalar_t >
void slate::internal::he2hb_trmm ( internal::TargetType< Target::HostTask ,
HermitianMatrix< scalar_t > &  AH,
Matrix< scalar_t > &  A,
Matrix< scalar_t > &  B,
std::vector< int64_t > &  panel_rank_rows,
int  priority,
int64_t  queue_index 
)

Triangular matrix multiply.

Host OpenMP task implementation.

◆ hettmqr() [1/2]

template<typename scalar_t >
void slate::internal::hettmqr ( internal::TargetType< Target::HostTask ,
Op  op,
Matrix< scalar_t > &  V,
Matrix< scalar_t > &  T,
HermitianMatrix< scalar_t > &  C,
int  tag_base 
)

Distributed multiply Hermitian matrix on left and right by Q from QR triangle-triangle factorization of column of tiles.

Host implementation.

Parameters
[in]tag_baseThis process uses MPI tags from the range [tag_base, tag_base+mt*nt)

◆ hettmqr() [2/2]

template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::hettmqr ( Op  op,
Matrix< scalar_t > &&  V,
Matrix< scalar_t > &&  T,
HermitianMatrix< scalar_t > &&  C,
int  tag 
)

Distributed multiply Hermitian matrix on left and right by Q from QR triangle-triangle factorization of column of tiles.

Dispatches to target implementations. todo: This assumes A and T have already been communicated as needed. However, it necessarily handles communication for C. Tag is used in geqrf to differentiate communication for look-ahead panel from rest of trailing matrix.

◆ unmtr_hb2st()

template<Target target, typename scalar_t >
void slate::internal::unmtr_hb2st ( internal::TargetType< target >  ,
Side  side,
Op  op,
Matrix< scalar_t > &  V,
Matrix< scalar_t > &  C 
)

Generic implementation of unmtr_hb2st.

SLATE Working Note 13: Implementing Singular Value and Symmetric/Hermitian Eigenvalue Solvers, SLATE Working Notes, no. 13. https://www.icl.utk.edu/publications/swan-013