SLATE 2024.05.31
Software for Linear Algebra Targeting Exascale
Loading...
Searching...
No Matches
trmm: Triangular matrix multiply

Functions

template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::trmm (Side side, scalar_t alpha, TriangularMatrix< scalar_t > &&A, Matrix< scalar_t > &&B, int priority, int64_t queue_index)
 Triangular matrix multiply.
 
template<typename scalar_t >
void slate::internal::trmm (internal::TargetType< Target::HostTask >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, int64_t queue_index)
 Triangular matrix multiply.
 
template<typename scalar_t >
void slate::internal::trmm (internal::TargetType< Target::HostNest >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, int64_t queue_index)
 Triangular matrix multiply.
 
template<typename scalar_t >
void slate::internal::trmm (internal::TargetType< Target::HostBatch >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, int64_t queue_index)
 Triangular matrix multiply.
 
template<typename scalar_t >
void slate::internal::trmm (internal::TargetType< Target::Devices >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, int64_t queue_index)
 Triangular matrix multiply.
 
template<Target target = Target::HostTask, typename scalar_t >
void slate::work::trmm (Side side, scalar_t alpha, TriangularMatrix< scalar_t > A, Matrix< scalar_t > B, uint8_t *bcast, uint8_t *gemm, int64_t lookahead)
 Triangular matrix multiply.
 

Detailed Description

Function Documentation

◆ trmm() [1/6]

template<typename scalar_t >
void slate::internal::trmm ( internal::TargetType< Target::Devices ,
Side  side,
scalar_t  alpha,
TriangularMatrix< scalar_t > &  A,
Matrix< scalar_t > &  B,
int  priority,
int64_t  queue_index 
)

Triangular matrix multiply.

GPU device batched cuBLAS implementation.

◆ trmm() [2/6]

template<typename scalar_t >
void slate::internal::trmm ( internal::TargetType< Target::HostBatch ,
Side  side,
scalar_t  alpha,
TriangularMatrix< scalar_t > &  A,
Matrix< scalar_t > &  B,
int  priority,
int64_t  queue_index 
)

Triangular matrix multiply.

Host batched implementation.

◆ trmm() [3/6]

template<typename scalar_t >
void slate::internal::trmm ( internal::TargetType< Target::HostNest ,
Side  side,
scalar_t  alpha,
TriangularMatrix< scalar_t > &  A,
Matrix< scalar_t > &  B,
int  priority,
int64_t  queue_index 
)

Triangular matrix multiply.

Host nested OpenMP implementation.

◆ trmm() [4/6]

template<typename scalar_t >
void slate::internal::trmm ( internal::TargetType< Target::HostTask ,
Side  side,
scalar_t  alpha,
TriangularMatrix< scalar_t > &  A,
Matrix< scalar_t > &  B,
int  priority,
int64_t  queue_index 
)

Triangular matrix multiply.

Host OpenMP task implementation.

◆ trmm() [5/6]

template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::trmm ( Side  side,
scalar_t  alpha,
TriangularMatrix< scalar_t > &&  A,
Matrix< scalar_t > &&  B,
int  priority,
int64_t  queue_index 
)

Triangular matrix multiply.

Dispatches to target implementations.

◆ trmm() [6/6]

template<Target target = Target::HostTask, typename scalar_t >
void slate::work::trmm ( Side  side,
scalar_t  alpha,
TriangularMatrix< scalar_t >  A,
Matrix< scalar_t >  B,
uint8_t *  bcast,
uint8_t *  gemm,
int64_t  lookahead 
)

Triangular matrix multiply.

Note A and B are passed by value, so we can transpose if needed (for side = right) without affecting caller.

Template Parameters
targetOne of HostTask, HostNest, HostBatch, Devices.
scalar_tOne of float, double, std::complex<float>, std::complex<double>.
Parameters
[in]sideWhether A appears on the left or on the right of B:
  • Side::Left: \(B = \alpha A B\)
  • Side::Right: \(B = \alpha B A\)
[in]alphaThe scalar alpha.
[in]A
  • If side = left, the m-by-m triangular matrix A;
  • if side = right, the n-by-n triangular matrix A.
[in,out]BOn entry, the m-by-n matrix B. On exit, overwritten by the result \(\alpha A B\) or \(\alpha B A\).
[in]bcastA raw pointer to a dummy vector data.. The dummy vector is used for OpenMP dependencies tracking, not based on the actual data. Entries in the dummy vector represent each column of matrix \(A\) and each row of matrix \(B\). The size of bcast should be number of block columns of matrix \(A\).
[in]gemmA raw pointer to a dummy vector data. The dummy vector is used for OpenMP dependencies tracking, not based on the actual data. Entries in the dummy vector represent each column of matrix \(A\) and each row of matrix \(B\). The size of gemm should be number of block columns of matrix \(A\).
[in]lookaheadNumber of blocks to overlap communication and computation. lookahead >= 0. Default 1.