BLAS++ 2024.05.31
BLAS C++ API
Loading...
Searching...
No Matches
trmm: Triangular matrix multiply

\(B = \alpha \;op(A)\; B\) or \(B = \alpha B \;op(A)\) where \(A\) is triangular More...

Functions

template<typename TA , typename TB >
void blas::trmm (blas::Layout layout, blas::Side side, blas::Uplo uplo, blas::Op trans, blas::Diag diag, int64_t m, int64_t n, blas::scalar_type< TA, TB > alpha, TA const *A, int64_t lda, TB *B, int64_t ldb)
 Triangular matrix-matrix multiply:
 
void blas::batch::trmm (blas::Layout layout, std::vector< blas::Side > const &side, std::vector< blas::Uplo > const &uplo, std::vector< blas::Op > const &trans, std::vector< blas::Diag > const &diag, std::vector< int64_t > const &m, std::vector< int64_t > const &n, std::vector< float > const &alpha, std::vector< float * > const &Aarray, std::vector< int64_t > const &lda, std::vector< float * > const &Barray, std::vector< int64_t > const &ldb, size_t batch_size, std::vector< int64_t > &info)
 CPU, variable-size batched, float version.
 
void blas::batch::trmm (blas::Layout layout, std::vector< blas::Side > const &side, std::vector< blas::Uplo > const &uplo, std::vector< blas::Op > const &trans, std::vector< blas::Diag > const &diag, std::vector< int64_t > const &m, std::vector< int64_t > const &n, std::vector< double > const &alpha, std::vector< double * > const &Aarray, std::vector< int64_t > const &lda, std::vector< double * > const &Barray, std::vector< int64_t > const &ldb, size_t batch_size, std::vector< int64_t > &info)
 CPU, variable-size batched, double version.
 
void blas::batch::trmm (blas::Layout layout, std::vector< blas::Side > const &side, std::vector< blas::Uplo > const &uplo, std::vector< blas::Op > const &trans, std::vector< blas::Diag > const &diag, std::vector< int64_t > const &m, std::vector< int64_t > const &n, std::vector< std::complex< float > > const &alpha, std::vector< std::complex< float > * > const &Aarray, std::vector< int64_t > const &lda, std::vector< std::complex< float > * > const &Barray, std::vector< int64_t > const &ldb, size_t batch_size, std::vector< int64_t > &info)
 CPU, variable-size batched, complex<float> version.
 
void blas::batch::trmm (blas::Layout layout, std::vector< blas::Side > const &side, std::vector< blas::Uplo > const &uplo, std::vector< blas::Op > const &trans, std::vector< blas::Diag > const &diag, std::vector< int64_t > const &m, std::vector< int64_t > const &n, std::vector< std::complex< double > > const &alpha, std::vector< std::complex< double > * > const &Aarray, std::vector< int64_t > const &lda, std::vector< std::complex< double > * > const &Barray, std::vector< int64_t > const &ldb, size_t batch_size, std::vector< int64_t > &info)
 CPU, variable-size batched, complex<double> version.
 
void blas::batch::trmm (blas::Layout layout, std::vector< blas::Side > const &side, std::vector< blas::Uplo > const &uplo, std::vector< blas::Op > const &trans, std::vector< blas::Diag > const &diag, std::vector< int64_t > const &m, std::vector< int64_t > const &n, std::vector< float > const &alpha, std::vector< float * > const &Aarray, std::vector< int64_t > const &lda, std::vector< float * > const &Barray, std::vector< int64_t > const &ldb, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
 GPU device, variable-size batched, float version.
 
void blas::batch::trmm (blas::Layout layout, std::vector< blas::Side > const &side, std::vector< blas::Uplo > const &uplo, std::vector< blas::Op > const &trans, std::vector< blas::Diag > const &diag, std::vector< int64_t > const &m, std::vector< int64_t > const &n, std::vector< double > const &alpha, std::vector< double * > const &Aarray, std::vector< int64_t > const &lda, std::vector< double * > const &Barray, std::vector< int64_t > const &ldb, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
 GPU device, variable-size batched, double version.
 
void blas::batch::trmm (blas::Layout layout, std::vector< blas::Side > const &side, std::vector< blas::Uplo > const &uplo, std::vector< blas::Op > const &trans, std::vector< blas::Diag > const &diag, std::vector< int64_t > const &m, std::vector< int64_t > const &n, std::vector< std::complex< float > > const &alpha, std::vector< std::complex< float > * > const &Aarray, std::vector< int64_t > const &lda, std::vector< std::complex< float > * > const &Barray, std::vector< int64_t > const &ldb, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
 GPU device, variable-size batched, complex<float> version.
 
void blas::batch::trmm (blas::Layout layout, std::vector< blas::Side > const &side, std::vector< blas::Uplo > const &uplo, std::vector< blas::Op > const &trans, std::vector< blas::Diag > const &diag, std::vector< int64_t > const &m, std::vector< int64_t > const &n, std::vector< std::complex< double > > const &alpha, std::vector< std::complex< double > * > const &Aarray, std::vector< int64_t > const &lda, std::vector< std::complex< double > * > const &Barray, std::vector< int64_t > const &ldb, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
 GPU device, variable-size batched, complex<double> version.
 
void blas::trmm (blas::Layout layout, blas::Side side, blas::Uplo uplo, blas::Op trans, blas::Diag diag, int64_t m, int64_t n, float alpha, float const *A, int64_t lda, float *B, int64_t ldb, blas::Queue &queue)
 GPU device, float version.
 
void blas::trmm (blas::Layout layout, blas::Side side, blas::Uplo uplo, blas::Op trans, blas::Diag diag, int64_t m, int64_t n, double alpha, double const *A, int64_t lda, double *B, int64_t ldb, blas::Queue &queue)
 GPU device, double version.
 
void blas::trmm (blas::Layout layout, blas::Side side, blas::Uplo uplo, blas::Op trans, blas::Diag diag, int64_t m, int64_t n, std::complex< float > alpha, std::complex< float > const *A, int64_t lda, std::complex< float > *B, int64_t ldb, blas::Queue &queue)
 GPU device, complex<float> version.
 
void blas::trmm (blas::Layout layout, blas::Side side, blas::Uplo uplo, blas::Op trans, blas::Diag diag, int64_t m, int64_t n, std::complex< double > alpha, std::complex< double > const *A, int64_t lda, std::complex< double > *B, int64_t ldb, blas::Queue &queue)
 GPU device, complex<double> version.
 
void blas::trmm (blas::Layout layout, blas::Side side, blas::Uplo uplo, blas::Op trans, blas::Diag diag, int64_t m, int64_t n, float alpha, float const *A, int64_t lda, float *B, int64_t ldb)
 CPU, float version.
 
void blas::trmm (blas::Layout layout, blas::Side side, blas::Uplo uplo, blas::Op trans, blas::Diag diag, int64_t m, int64_t n, double alpha, double const *A, int64_t lda, double *B, int64_t ldb)
 CPU, double version.
 
void blas::trmm (blas::Layout layout, blas::Side side, blas::Uplo uplo, blas::Op trans, blas::Diag diag, int64_t m, int64_t n, std::complex< float > alpha, std::complex< float > const *A, int64_t lda, std::complex< float > *B, int64_t ldb)
 CPU, complex<float> version.
 
void blas::trmm (blas::Layout layout, blas::Side side, blas::Uplo uplo, blas::Op trans, blas::Diag diag, int64_t m, int64_t n, std::complex< double > alpha, std::complex< double > const *A, int64_t lda, std::complex< double > *B, int64_t ldb)
 CPU, complex<double> version.
 

Detailed Description

\(B = \alpha \;op(A)\; B\) or \(B = \alpha B \;op(A)\) where \(A\) is triangular

Function Documentation

◆ trmm()

template<typename TA , typename TB >
void blas::trmm ( blas::Layout  layout,
blas::Side  side,
blas::Uplo  uplo,
blas::Op  trans,
blas::Diag  diag,
int64_t  m,
int64_t  n,
blas::scalar_type< TA, TB >  alpha,
TA const *  A,
int64_t  lda,
TB *  B,
int64_t  ldb 
)

Triangular matrix-matrix multiply:

\[ B = \alpha op(A) B, \]

or

\[ B = \alpha B op(A), \]

where \(op(A)\) is one of \(op(A) = A\), \(op(A) = A^T\), or \(op(A) = A^H\), B is an m-by-n matrix, and A is an m-by-m or n-by-n, unit or non-unit, upper or lower triangular matrix.

Generic implementation for arbitrary data types.

Parameters
[in]layoutMatrix storage, Layout::ColMajor or Layout::RowMajor.
[in]sideWhether \(op(A)\) is on the left or right of B:
  • Side::Left: \(B = \alpha op(A) B\).
  • Side::Right: \(B = \alpha B op(A)\).
[in]uploWhat part of the matrix A is referenced, the opposite triangle being assumed to be zero:
[in]transThe form of \(op(A)\):
  • Op::NoTrans: \(op(A) = A\).
  • Op::Trans: \(op(A) = A^T\).
  • Op::ConjTrans: \(op(A) = A^H\).
[in]diagWhether A has a unit or non-unit diagonal:
  • Diag::Unit: A is assumed to be unit triangular.
  • Diag::NonUnit: A is not assumed to be unit triangular.
[in]mNumber of rows of matrix B. m >= 0.
[in]nNumber of columns of matrix B. n >= 0.
[in]alphaScalar alpha. If alpha is zero, A is not accessed.
[in]A
  • If side = Left: the m-by-m matrix A, stored in an lda-by-m array [RowMajor: m-by-lda].
  • If side = Right: the n-by-n matrix A, stored in an lda-by-n array [RowMajor: n-by-lda].
[in]ldaLeading dimension of A.
  • If side = left: lda >= max(1, m).
  • If side = right: lda >= max(1, n).
[in,out]BThe m-by-n matrix B, stored in an ldb-by-n array [RowMajor: m-by-ldb].
[in]ldbLeading dimension of B. ldb >= max(1, m) [RowMajor: ldb >= max(1, n)].