BLAS++ 2024.05.31
BLAS C++ API
|
Functions | |
template<typename scalar_t > | |
void | blas::impl::trsm (blas::Layout layout, std::vector< blas::Side > const &side, std::vector< blas::Uplo > const &uplo, std::vector< blas::Op > const &trans, std::vector< blas::Diag > const &diag, std::vector< int64_t > const &m, std::vector< int64_t > const &n, std::vector< scalar_t > const &alpha, std::vector< scalar_t * > const &Aarray, std::vector< int64_t > const &lda, std::vector< scalar_t * > const &Barray, std::vector< int64_t > const &ldb, size_t batch_size, std::vector< int64_t > &info) |
CPU, variable-size batched version. | |
template<typename scalar_t > | |
void | blas::impl::trsm (blas::Layout layout, std::vector< blas::Side > const &side, std::vector< blas::Uplo > const &uplo, std::vector< blas::Op > const &trans, std::vector< blas::Diag > const &diag, std::vector< int64_t > const &m, std::vector< int64_t > const &n, std::vector< scalar_t > const &alpha, std::vector< scalar_t * > const &Aarray, std::vector< int64_t > const &lda, std::vector< scalar_t * > const &Barray, std::vector< int64_t > const &ldb, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue) |
GPU device, variable-size batched version. | |
template<typename scalar_t > | |
void | blas::impl::trsm (blas::Layout layout, blas::Side side, blas::Uplo uplo, blas::Op trans, blas::Diag diag, int64_t m, int64_t n, scalar_t alpha, scalar_t const *A, int64_t lda, scalar_t *B, int64_t ldb, blas::Queue &queue) |
Mid-level templated wrapper checks and converts arguments, then calls low-level wrapper. | |
void | blas::internal::trsm (char side, char uplo, char trans, char diag, blas_int m, blas_int n, float alpha, float const *A, blas_int lda, float *B, blas_int ldb) |
Low-level overload wrapper calls Fortran, float version. | |
void | blas::internal::trsm (char side, char uplo, char trans, char diag, blas_int m, blas_int n, double alpha, double const *A, blas_int lda, double *B, blas_int ldb) |
Low-level overload wrapper calls Fortran, double version. | |
void | blas::internal::trsm (char side, char uplo, char trans, char diag, blas_int m, blas_int n, std::complex< float > alpha, std::complex< float > const *A, blas_int lda, std::complex< float > *B, blas_int ldb) |
Low-level overload wrapper calls Fortran, complex<float> version. | |
void | blas::internal::trsm (char side, char uplo, char trans, char diag, blas_int m, blas_int n, std::complex< double > alpha, std::complex< double > const *A, blas_int lda, std::complex< double > *B, blas_int ldb) |
Low-level overload wrapper calls Fortran, complex<double> version. | |
template<typename scalar_t > | |
void | blas::impl::trsm (blas::Layout layout, blas::Side side, blas::Uplo uplo, blas::Op trans, blas::Diag diag, int64_t m, int64_t n, scalar_t alpha, scalar_t const *A, int64_t lda, scalar_t *B, int64_t ldb) |
Mid-level templated wrapper checks and converts arguments, then calls low-level wrapper. | |
void blas::impl::trsm | ( | blas::Layout | layout, |
std::vector< blas::Side > const & | side, | ||
std::vector< blas::Uplo > const & | uplo, | ||
std::vector< blas::Op > const & | trans, | ||
std::vector< blas::Diag > const & | diag, | ||
std::vector< int64_t > const & | m, | ||
std::vector< int64_t > const & | n, | ||
std::vector< scalar_t > const & | alpha, | ||
std::vector< scalar_t * > const & | Aarray, | ||
std::vector< int64_t > const & | lda, | ||
std::vector< scalar_t * > const & | Barray, | ||
std::vector< int64_t > const & | ldb, | ||
size_t | batch_size, | ||
std::vector< int64_t > & | info | ||
) |
CPU, variable-size batched version.
Mid-level templated wrapper checks and converts arguments, then makes individual routine calls in parallel.
void blas::impl::trsm | ( | blas::Layout | layout, |
std::vector< blas::Side > const & | side, | ||
std::vector< blas::Uplo > const & | uplo, | ||
std::vector< blas::Op > const & | trans, | ||
std::vector< blas::Diag > const & | diag, | ||
std::vector< int64_t > const & | m, | ||
std::vector< int64_t > const & | n, | ||
std::vector< scalar_t > const & | alpha, | ||
std::vector< scalar_t * > const & | Aarray, | ||
std::vector< int64_t > const & | lda, | ||
std::vector< scalar_t * > const & | Barray, | ||
std::vector< int64_t > const & | ldb, | ||
size_t | batch_size, | ||
std::vector< int64_t > & | info, | ||
blas::Queue & | queue | ||
) |
GPU device, variable-size batched version.
Mid-level templated wrapper checks and converts arguments, then makes individual routine calls in parallel.