BLAS++ 2024.05.31
BLAS C++ API
|
Functions | |
template<typename scalar_t > | |
void | blas::impl::her2k (blas::Layout layout, std::vector< blas::Uplo > const &uplo, std::vector< blas::Op > const &trans, std::vector< int64_t > const &n, std::vector< int64_t > const &k, std::vector< scalar_t > const &alpha, std::vector< scalar_t * > const &Aarray, std::vector< int64_t > const &lda, std::vector< scalar_t * > const &Barray, std::vector< int64_t > const &ldb, std::vector< real_type< scalar_t > > const &beta, std::vector< scalar_t * > const &Carray, std::vector< int64_t > const &ldc, size_t batch_size, std::vector< int64_t > &info) |
CPU, variable-size batched version. | |
template<typename scalar_t > | |
void | blas::impl::her2k (blas::Layout layout, std::vector< blas::Uplo > const &uplo, std::vector< blas::Op > const &trans, std::vector< int64_t > const &n, std::vector< int64_t > const &k, std::vector< scalar_t > const &alpha, std::vector< scalar_t * > const &Aarray, std::vector< int64_t > const &lda, std::vector< scalar_t * > const &Barray, std::vector< int64_t > const &ldb, std::vector< real_type< scalar_t > > const &beta, std::vector< scalar_t * > const &Carray, std::vector< int64_t > const &ldc, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue) |
GPU device, variable-size batched version. | |
template<typename scalar_t > | |
void | blas::impl::her2k (blas::Layout layout, blas::Uplo uplo, blas::Op trans, int64_t n, int64_t k, scalar_t alpha, scalar_t const *A, int64_t lda, scalar_t const *B, int64_t ldb, blas::real_type< scalar_t > beta, scalar_t *C, int64_t ldc, blas::Queue &queue) |
Mid-level templated wrapper checks and converts arguments, then calls low-level wrapper. | |
void | blas::internal::her2k (char uplo, char trans, blas_int n, blas_int k, std::complex< float > alpha, std::complex< float > const *A, blas_int lda, std::complex< float > const *B, blas_int ldb, float beta, std::complex< float > *C, blas_int ldc) |
Low-level overload wrapper calls Fortran, complex<float> version. | |
void | blas::internal::her2k (char uplo, char trans, blas_int n, blas_int k, std::complex< double > alpha, std::complex< double > const *A, blas_int lda, std::complex< double > const *B, blas_int ldb, double beta, std::complex< double > *C, blas_int ldc) |
Low-level overload wrapper calls Fortran, complex<double> version. | |
template<typename scalar_t > | |
void | blas::impl::her2k (blas::Layout layout, blas::Uplo uplo, blas::Op trans, int64_t n, int64_t k, scalar_t alpha, scalar_t const *A, int64_t lda, scalar_t const *B, int64_t ldb, blas::real_type< scalar_t > beta, scalar_t *C, int64_t ldc) |
Mid-level templated wrapper checks and converts arguments, then calls low-level wrapper. | |
void blas::impl::her2k | ( | blas::Layout | layout, |
std::vector< blas::Uplo > const & | uplo, | ||
std::vector< blas::Op > const & | trans, | ||
std::vector< int64_t > const & | n, | ||
std::vector< int64_t > const & | k, | ||
std::vector< scalar_t > const & | alpha, | ||
std::vector< scalar_t * > const & | Aarray, | ||
std::vector< int64_t > const & | lda, | ||
std::vector< scalar_t * > const & | Barray, | ||
std::vector< int64_t > const & | ldb, | ||
std::vector< real_type< scalar_t > > const & | beta, | ||
std::vector< scalar_t * > const & | Carray, | ||
std::vector< int64_t > const & | ldc, | ||
size_t | batch_size, | ||
std::vector< int64_t > & | info | ||
) |
CPU, variable-size batched version.
Mid-level templated wrapper checks and converts arguments, then makes individual routine calls in parallel.
void blas::impl::her2k | ( | blas::Layout | layout, |
std::vector< blas::Uplo > const & | uplo, | ||
std::vector< blas::Op > const & | trans, | ||
std::vector< int64_t > const & | n, | ||
std::vector< int64_t > const & | k, | ||
std::vector< scalar_t > const & | alpha, | ||
std::vector< scalar_t * > const & | Aarray, | ||
std::vector< int64_t > const & | lda, | ||
std::vector< scalar_t * > const & | Barray, | ||
std::vector< int64_t > const & | ldb, | ||
std::vector< real_type< scalar_t > > const & | beta, | ||
std::vector< scalar_t * > const & | Carray, | ||
std::vector< int64_t > const & | ldc, | ||
size_t | batch_size, | ||
std::vector< int64_t > & | info, | ||
blas::Queue & | queue | ||
) |
GPU device, variable-size batched version.
Mid-level templated wrapper checks and converts arguments, then makes individual routine calls in parallel.