SLATE 2024.05.31
Software for Linear Algebra Targeting Exascale
|
Functions | |
template<Target target = Target::HostTask, typename scalar_t > | |
void | slate::internal::trsm (Side side, scalar_t alpha, TriangularMatrix< scalar_t > &&A, Matrix< scalar_t > &&B, int priority, Layout layout, int64_t queue_index) |
Triangular solve matrix (multiple right-hand sides). | |
template<typename scalar_t > | |
void | slate::internal::trsm (internal::TargetType< Target::HostTask >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, Layout layout, int64_t queue_index) |
Triangular solve matrix (multiple right-hand sides). | |
template<typename scalar_t > | |
void | slate::internal::trsm (internal::TargetType< Target::HostNest >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, Layout layout, int64_t queue_index) |
Triangular solve matrix (multiple right-hand sides). | |
template<typename scalar_t > | |
void | slate::internal::trsm (internal::TargetType< Target::HostBatch >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, Layout layout, int64_t queue_index) |
Triangular solve matrix (multiple right-hand sides). | |
template<typename scalar_t > | |
void | slate::internal::trsm (internal::TargetType< Target::Devices >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, Layout layout, int64_t queue_index) |
Triangular solve matrix (multiple right-hand sides). | |
template<Target target = Target::HostTask, typename scalar_t > | |
void | slate::internal::trsmA (Side side, scalar_t alpha, TriangularMatrix< scalar_t > &&A, Matrix< scalar_t > &&B, int priority, Layout layout, int64_t queue_index) |
Triangular solve matrix (multiple right-hand sides). | |
template<typename scalar_t > | |
void | slate::internal::trsmA (internal::TargetType< Target::HostTask >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, Layout layout, int64_t queue_index) |
Triangular solve matrix (multiple right-hand sides). | |
template<typename scalar_t > | |
void | slate::internal::trsmA (internal::TargetType< Target::HostNest >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, Layout layout, int64_t queue_index) |
Triangular solve matrix (multiple right-hand sides). | |
template<typename scalar_t > | |
void | slate::internal::trsmA (internal::TargetType< Target::HostBatch >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, Layout layout, int64_t queue_index) |
Triangular solve matrix (multiple right-hand sides). | |
template<typename scalar_t > | |
void | slate::internal::trsmA (internal::TargetType< Target::Devices >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, Layout layout, int64_t queue_index) |
Triangular solve matrix (multiple right-hand sides). | |
template<Target target = Target::HostTask, typename scalar_t > | |
void | slate::work::trsm (Side side, scalar_t alpha, TriangularMatrix< scalar_t > A, Matrix< scalar_t > B, uint8_t *row, Options const &opts) |
Triangular solve matrix (multiple right-hand sides). | |
template<Target target = Target::HostTask, typename scalar_t > | |
void | slate::work::trsmA (Side side, scalar_t alpha, TriangularMatrix< scalar_t > A, Matrix< scalar_t > B, uint8_t *row, Options const &opts) |
Triangular solve matrix (multiple right-hand sides). | |
void slate::internal::trsm | ( | internal::TargetType< Target::Devices > | , |
Side | side, | ||
scalar_t | alpha, | ||
TriangularMatrix< scalar_t > & | A, | ||
Matrix< scalar_t > & | B, | ||
int | priority, | ||
Layout | layout, | ||
int64_t | queue_index | ||
) |
Triangular solve matrix (multiple right-hand sides).
GPU device batched cuBLAS implementation.
void slate::internal::trsm | ( | internal::TargetType< Target::HostBatch > | , |
Side | side, | ||
scalar_t | alpha, | ||
TriangularMatrix< scalar_t > & | A, | ||
Matrix< scalar_t > & | B, | ||
int | priority, | ||
Layout | layout, | ||
int64_t | queue_index | ||
) |
Triangular solve matrix (multiple right-hand sides).
Host batched implementation.
void slate::internal::trsm | ( | internal::TargetType< Target::HostNest > | , |
Side | side, | ||
scalar_t | alpha, | ||
TriangularMatrix< scalar_t > & | A, | ||
Matrix< scalar_t > & | B, | ||
int | priority, | ||
Layout | layout, | ||
int64_t | queue_index | ||
) |
Triangular solve matrix (multiple right-hand sides).
Host nested OpenMP implementation.
void slate::internal::trsm | ( | internal::TargetType< Target::HostTask > | , |
Side | side, | ||
scalar_t | alpha, | ||
TriangularMatrix< scalar_t > & | A, | ||
Matrix< scalar_t > & | B, | ||
int | priority, | ||
Layout | layout, | ||
int64_t | queue_index | ||
) |
Triangular solve matrix (multiple right-hand sides).
Host OpenMP task implementation.
void slate::internal::trsm | ( | Side | side, |
scalar_t | alpha, | ||
TriangularMatrix< scalar_t > && | A, | ||
Matrix< scalar_t > && | B, | ||
int | priority, | ||
Layout | layout, | ||
int64_t | queue_index | ||
) |
Triangular solve matrix (multiple right-hand sides).
Dispatches to target implementations.
void slate::work::trsm | ( | Side | side, |
scalar_t | alpha, | ||
TriangularMatrix< scalar_t > | A, | ||
Matrix< scalar_t > | B, | ||
uint8_t * | row, | ||
Options const & | opts | ||
) |
Triangular solve matrix (multiple right-hand sides).
Note A and B are passed by value, so we can transpose if needed (for side = right) without affecting caller.
target | One of HostTask, HostNest, HostBatch, Devices. |
scalar_t | One of float, double, std::complex<float>, std::complex<double>. |
[in] | side | Whether A appears on the left or on the right of X:
|
[in] | alpha | The scalar alpha. |
[in] | A |
|
[in,out] | B | On entry, the m-by-n matrix B. On exit, overwritten by the result X. |
[in] | row | A raw pointer to a dummy vector data. The dummy vector is used for OpenMP dependencies tracking, not based on the actual data. Entries in the dummy vector represent each row of matrix \(B\). The size of row should be number of block columns of matrix \(A\). |
[in] | lookahead | Number of blocks to overlap communication and computation. lookahead >= 0. Default 1. |
void slate::internal::trsmA | ( | internal::TargetType< Target::Devices > | , |
Side | side, | ||
scalar_t | alpha, | ||
TriangularMatrix< scalar_t > & | A, | ||
Matrix< scalar_t > & | B, | ||
int | priority, | ||
Layout | layout, | ||
int64_t | queue_index | ||
) |
Triangular solve matrix (multiple right-hand sides).
GPU device batched cuBLAS implementation.
void slate::internal::trsmA | ( | internal::TargetType< Target::HostBatch > | , |
Side | side, | ||
scalar_t | alpha, | ||
TriangularMatrix< scalar_t > & | A, | ||
Matrix< scalar_t > & | B, | ||
int | priority, | ||
Layout | layout, | ||
int64_t | queue_index | ||
) |
Triangular solve matrix (multiple right-hand sides).
Host batched implementation.
void slate::internal::trsmA | ( | internal::TargetType< Target::HostNest > | , |
Side | side, | ||
scalar_t | alpha, | ||
TriangularMatrix< scalar_t > & | A, | ||
Matrix< scalar_t > & | B, | ||
int | priority, | ||
Layout | layout, | ||
int64_t | queue_index | ||
) |
Triangular solve matrix (multiple right-hand sides).
Host nested OpenMP implementation.
void slate::internal::trsmA | ( | internal::TargetType< Target::HostTask > | , |
Side | side, | ||
scalar_t | alpha, | ||
TriangularMatrix< scalar_t > & | A, | ||
Matrix< scalar_t > & | B, | ||
int | priority, | ||
Layout | layout, | ||
int64_t | queue_index | ||
) |
Triangular solve matrix (multiple right-hand sides).
Host OpenMP task implementation.
void slate::internal::trsmA | ( | Side | side, |
scalar_t | alpha, | ||
TriangularMatrix< scalar_t > && | A, | ||
Matrix< scalar_t > && | B, | ||
int | priority, | ||
Layout | layout, | ||
int64_t | queue_index | ||
) |
Triangular solve matrix (multiple right-hand sides).
We assume A is a single tile and that the calling rank owns it as well as it has [a copy of] all the tiles in B Dispatches to target implementations.
void slate::work::trsmA | ( | Side | side, |
scalar_t | alpha, | ||
TriangularMatrix< scalar_t > | A, | ||
Matrix< scalar_t > | B, | ||
uint8_t * | row, | ||
Options const & | opts | ||
) |
Triangular solve matrix (multiple right-hand sides).
Note A and B are passed by value, so we can transpose if needed (for side = right) without affecting caller.
target | One of HostTask, HostNest, HostBatch, Devices. |
scalar_t | One of float, double, std::complex<float>, std::complex<double>. |
[in] | side | Whether A appears on the left or on the right of X:
|
[in] | alpha | The scalar alpha. |
[in] | A |
|
[in,out] | B | On entry, the m-by-n matrix B. On exit, overwritten by the result X. |
[in] | row | A raw pointer to a dummy vector data. The dummy vector is used for OpenMP dependencies tracking, not based on the actual data. Entries in the dummy vector represent each row of matrix \(B\). The size of row should be number of block columns of matrix \(A\). |
[in] | lookahead | Number of blocks to overlap communication and computation. lookahead >= 0. Default 1. |