BLAS++ 2024.05.31
BLAS C++ API
Loading...
Searching...
No Matches
hemm: Hermitian matrix multiply

Functions

template<typename scalar_t >
void blas::impl::hemm (blas::Layout layout, std::vector< blas::Side > const &side, std::vector< blas::Uplo > const &uplo, std::vector< int64_t > const &m, std::vector< int64_t > const &n, std::vector< scalar_t > const &alpha, std::vector< scalar_t * > const &Aarray, std::vector< int64_t > const &lda, std::vector< scalar_t * > const &Barray, std::vector< int64_t > const &ldb, std::vector< scalar_t > const &beta, std::vector< scalar_t * > const &Carray, std::vector< int64_t > const &ldc, size_t batch_size, std::vector< int64_t > &info)
 CPU, variable-size batched version.
 
template<typename scalar_t >
void blas::impl::hemm (blas::Layout layout, std::vector< blas::Side > const &side, std::vector< blas::Uplo > const &uplo, std::vector< int64_t > const &m, std::vector< int64_t > const &n, std::vector< scalar_t > const &alpha, std::vector< scalar_t * > const &Aarray, std::vector< int64_t > const &lda, std::vector< scalar_t * > const &Barray, std::vector< int64_t > const &ldb, std::vector< scalar_t > const &beta, std::vector< scalar_t * > const &Carray, std::vector< int64_t > const &ldc, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
 GPU device, variable-size batched version.
 
template<typename scalar_t >
void blas::impl::hemm (blas::Layout layout, blas::Side side, blas::Uplo uplo, int64_t m, int64_t n, scalar_t alpha, scalar_t const *A, int64_t lda, scalar_t const *B, int64_t ldb, scalar_t beta, scalar_t *C, int64_t ldc, blas::Queue &queue)
 Mid-level templated wrapper checks and converts arguments, then calls low-level wrapper.
 
void blas::internal::hemm (char side, char uplo, blas_int m, blas_int n, std::complex< float > alpha, std::complex< float > const *A, blas_int lda, std::complex< float > const *B, blas_int ldb, std::complex< float > beta, std::complex< float > *C, blas_int ldc)
 Low-level overload wrapper calls Fortran, complex<float> version.
 
void blas::internal::hemm (char side, char uplo, blas_int m, blas_int n, std::complex< double > alpha, std::complex< double > const *A, blas_int lda, std::complex< double > const *B, blas_int ldb, std::complex< double > beta, std::complex< double > *C, blas_int ldc)
 Low-level overload wrapper calls Fortran, complex<double> version.
 
template<typename scalar_t >
void blas::impl::hemm (blas::Layout layout, blas::Side side, blas::Uplo uplo, int64_t m, int64_t n, scalar_t alpha, scalar_t const *A, int64_t lda, scalar_t const *B, int64_t ldb, scalar_t beta, scalar_t *C, int64_t ldc)
 Mid-level templated wrapper checks and converts arguments, then calls low-level wrapper.
 

Detailed Description

Function Documentation

◆ hemm() [1/2]

template<typename scalar_t >
void blas::impl::hemm ( blas::Layout  layout,
std::vector< blas::Side > const &  side,
std::vector< blas::Uplo > const &  uplo,
std::vector< int64_t > const &  m,
std::vector< int64_t > const &  n,
std::vector< scalar_t > const &  alpha,
std::vector< scalar_t * > const &  Aarray,
std::vector< int64_t > const &  lda,
std::vector< scalar_t * > const &  Barray,
std::vector< int64_t > const &  ldb,
std::vector< scalar_t > const &  beta,
std::vector< scalar_t * > const &  Carray,
std::vector< int64_t > const &  ldc,
size_t  batch_size,
std::vector< int64_t > &  info 
)

CPU, variable-size batched version.

Mid-level templated wrapper checks and converts arguments, then makes individual routine calls in parallel.

◆ hemm() [2/2]

template<typename scalar_t >
void blas::impl::hemm ( blas::Layout  layout,
std::vector< blas::Side > const &  side,
std::vector< blas::Uplo > const &  uplo,
std::vector< int64_t > const &  m,
std::vector< int64_t > const &  n,
std::vector< scalar_t > const &  alpha,
std::vector< scalar_t * > const &  Aarray,
std::vector< int64_t > const &  lda,
std::vector< scalar_t * > const &  Barray,
std::vector< int64_t > const &  ldb,
std::vector< scalar_t > const &  beta,
std::vector< scalar_t * > const &  Carray,
std::vector< int64_t > const &  ldc,
size_t  batch_size,
std::vector< int64_t > &  info,
blas::Queue queue 
)

GPU device, variable-size batched version.

Mid-level templated wrapper checks and converts arguments, then makes individual routine calls in parallel.