SLATE 2024.05.31
Software for Linear Algebra Targeting Exascale
Loading...
Searching...
No Matches
slate::internal Namespace Reference

Namespace used for SLATE internal implementation. More...

Classes

class  Array2D
 Very simple 2D array. More...
 
struct  device_regions_params
 Helper class to store the information on a device region. More...
 
class  DevVector
 A simple vector class for GPU memory, loosely based on std::vector. More...
 
class  TargetType
 TargetType is used to overload functions, since there is no C++ partial specialization of functions, only of classes. More...
 

Functions

MPI_Comm commFromSet (const std::set< int > &bcast_set, MPI_Comm mpi_comm, MPI_Group mpi_group, const int in_rank, int &out_rank, int tag=0)
 
void cubeBcastPattern (int size, int rank, int radix, std::list< int > &recv_from, std::list< int > &send_to)
 [internal] Implements a hypercube broadcast pattern.
 
void cubeReducePattern (int size, int rank, int radix, std::list< int > &recv_from, std::list< int > &send_to)
 
template<typename scalar_t >
void gerbt_setup_bcast (Side side, Matrix< scalar_t > A, int64_t i1, int64_t i2, typename Matrix< scalar_t >::BcastListTag &bcast_list)
 
template<typename scalar_t >
void gerbt_bcast_filter_duplicates (typename Matrix< scalar_t >::BcastListTag &bcast_list)
 
void gerbt_iterate_2d (int64_t d, int64_t inner_len, int64_t mt, int64_t nt, std::function< void(int64_t, int64_t, int64_t, int64_t, int64_t, int64_t)> body)
 
void gerbt_iterate_1d (Op trans, int64_t d, int64_t inner_len, int64_t mt, std::function< void(int64_t, int64_t, int64_t)> body)
 
template<Target target = Target::HostTask, typename src_scalar_t , typename dst_scalar_t >
void copy (Matrix< src_scalar_t > &&A, Matrix< dst_scalar_t > &&B, int priority, int queue_index)
 Copy and precision conversion.
 
template<Target target = Target::HostTask, typename src_scalar_t , typename dst_scalar_t >
void copy (BaseTrapezoidMatrix< src_scalar_t > &&A, BaseTrapezoidMatrix< dst_scalar_t > &&B, int priority, int queue_index)
 Copy and precision conversion.
 
template<Target target = Target::HostTask, typename scalar_t >
void scale (blas::real_type< scalar_t > numer, blas::real_type< scalar_t > denom, Matrix< scalar_t > &&A, int priority, int queue_index)
 Scale matrix entries by the real scalar numer/denom.
 
template<Target target = Target::HostTask, typename scalar_t >
void scale (blas::real_type< scalar_t > numer, blas::real_type< scalar_t > denom, BaseTrapezoidMatrix< scalar_t > &&A, int priority, int queue_index)
 Scale Trapezoid matrix entries by the real scalar numer/denom.
 
template<Target target = Target::HostTask, typename scalar_t , typename scalar_t2 >
void scale_row_col (Equed equed, std::vector< scalar_t2 > const &R, std::vector< scalar_t2 > const &C, Matrix< scalar_t > &&A)
 Apply row or column scaling, or both, to a Matrix.
 
template<Target target = Target::HostTask, typename scalar_t >
void set (scalar_t offdiag_value, scalar_t diag_value, Matrix< scalar_t > &&A, int priority, int queue_index)
 General matrix set.
 
template<Target target = Target::HostTask, typename scalar_t >
void set (scalar_t offdiag_value, scalar_t diag_value, BaseTrapezoidMatrix< scalar_t > &&A, int priority, int queue_index)
 Trapezoid matrix set.
 
template<Target target = Target::HostTask, typename scalar_t >
void copytb2bd (TriangularBandMatrix< scalar_t > &A, std::vector< blas::real_type< scalar_t > > &D, std::vector< blas::real_type< scalar_t > > &E)
 Copy bi-diagonal TriangularBand matrix to two vectors.
 
template<Target target = Target::HostTask, typename scalar_t >
void copyhb2st (HermitianBandMatrix< scalar_t > &A, std::vector< blas::real_type< scalar_t > > &D, std::vector< blas::real_type< scalar_t > > &E)
 Copy tri-diagonal HermitianBand matrix to two vectors.
 
template<Target target = Target::HostTask, typename scalar_t >
void gemm (scalar_t alpha, Matrix< scalar_t > &&A, Matrix< scalar_t > &&B, scalar_t beta, Matrix< scalar_t > &&C, Layout layout, int priority, int64_t queue_index)
 General matrix multiply to update trailing matrix, where A is a single block column and B is a single block row.
 
template<Target target = Target::HostTask, typename scalar_t >
void gemmA (scalar_t alpha, Matrix< scalar_t > &&A, Matrix< scalar_t > &&B, scalar_t beta, Matrix< scalar_t > &&C, Layout layout, int priority, int64_t queue_index)
 General matrix multiply for a left-looking update.
 
template<Target target = Target::HostTask, typename scalar_t >
void hemm (Side side, scalar_t alpha, HermitianMatrix< scalar_t > &&A, Matrix< scalar_t > &&B, scalar_t beta, Matrix< scalar_t > &&C, int priority)
 Hermitian matrix multiply to update trailing matrix, where A is a single tile.
 
template<Target target = Target::HostTask, typename scalar_t >
void hemm (Side side, scalar_t alpha, SymmetricMatrix< scalar_t > &&A, Matrix< scalar_t > &&B, scalar_t beta, Matrix< scalar_t > &&C, int priority=0, enable_if_t< ! is_complex< scalar_t >::value > *=nullptr)
 
template<Target target = Target::HostTask, typename scalar_t >
void hemmA (Side side, scalar_t alpha, HermitianMatrix< scalar_t > &&A, Matrix< scalar_t > &&B, scalar_t beta, Matrix< scalar_t > &&C, int priority)
 Hermitian matrix multiply to update trailing matrix, where A is a single tile.
 
template<Target target = Target::HostTask, typename scalar_t >
void herk (blas::real_type< scalar_t > alpha, Matrix< scalar_t > &&A, blas::real_type< scalar_t > beta, HermitianMatrix< scalar_t > &&C, int priority, int queue_index, Layout layout)
 Hermitian rank-k update of single block column (i.e., k = nb).
 
template<Target target = Target::HostTask, typename scalar_t >
void herk (blas::real_type< scalar_t > alpha, Matrix< scalar_t > &&A, blas::real_type< scalar_t > beta, SymmetricMatrix< scalar_t > &&C, int priority=0, int queue_index=0, Layout layout=Layout::ColMajor, enable_if_t< ! is_complex< scalar_t >::value > *=nullptr)
 
template<Target target = Target::HostTask, typename scalar_t >
void her2k (scalar_t alpha, Matrix< scalar_t > &&A, Matrix< scalar_t > &&B, blas::real_type< scalar_t > beta, HermitianMatrix< scalar_t > &&C, int priority, int queue_index, Layout layout)
 Hermitian rank-2k update of single block column (i.e., k = nb).
 
template<Target target = Target::HostTask, typename scalar_t >
void her2k (scalar_t alpha, Matrix< scalar_t > &&A, Matrix< scalar_t > &&B, blas::real_type< scalar_t > beta, SymmetricMatrix< scalar_t > &&C, int priority=0, int queue_index=0, Layout layout=Layout::ColMajor, enable_if_t< ! is_complex< scalar_t >::value > *=nullptr)
 
template<Target target = Target::HostTask, typename scalar_t >
void symm (Side side, scalar_t alpha, SymmetricMatrix< scalar_t > &&A, Matrix< scalar_t > &&B, scalar_t beta, Matrix< scalar_t > &&C, int priority)
 Symmetric matrix multiply to update trailing matrix, where A is a single tile.
 
template<Target target = Target::HostTask, typename scalar_t >
void symm (Side side, scalar_t alpha, HermitianMatrix< scalar_t > &&A, Matrix< scalar_t > &&B, scalar_t beta, Matrix< scalar_t > &&C, int priority=0, enable_if_t< ! is_complex< scalar_t >::value > *=nullptr)
 
template<Target target = Target::HostTask, typename scalar_t >
void syrk (scalar_t alpha, Matrix< scalar_t > &&A, scalar_t beta, SymmetricMatrix< scalar_t > &&C, int priority, int queue_index, Layout layout)
 Symmetric rank-k update of single block column (i.e., k = nb).
 
template<Target target = Target::HostTask, typename scalar_t >
void syrk (scalar_t alpha, Matrix< scalar_t > &&A, scalar_t beta, HermitianMatrix< scalar_t > &&C, int priority=0, int queue_index=0, Layout layout=Layout::ColMajor, enable_if_t< ! is_complex< scalar_t >::value > *=nullptr)
 
template<Target target = Target::HostTask, typename scalar_t >
void syr2k (scalar_t alpha, Matrix< scalar_t > &&A, Matrix< scalar_t > &&B, scalar_t beta, SymmetricMatrix< scalar_t > &&C, int priority, int queue_index, Layout layout)
 Symmetric rank-2k update of single block column (i.e., k = nb).
 
template<Target target = Target::HostTask, typename scalar_t >
void syr2k (scalar_t alpha, Matrix< scalar_t > &&A, Matrix< scalar_t > &&B, scalar_t beta, HermitianMatrix< scalar_t > &&C, int priority=0, int queue_index=0, Layout layout=Layout::ColMajor, enable_if_t< ! is_complex< scalar_t >::value > *=nullptr)
 
template<Target target = Target::HostTask, typename scalar_t >
void trmm (Side side, scalar_t alpha, TriangularMatrix< scalar_t > &&A, Matrix< scalar_t > &&B, int priority, int64_t queue_index)
 Triangular matrix multiply.
 
template<Target target = Target::HostTask, typename scalar_t >
void trsm (Side side, scalar_t alpha, TriangularMatrix< scalar_t > &&A, Matrix< scalar_t > &&B, int priority, Layout layout, int64_t queue_index)
 Triangular solve matrix (multiple right-hand sides).
 
template<Target target = Target::HostTask, typename scalar_t >
void trsmA (Side side, scalar_t alpha, TriangularMatrix< scalar_t > &&A, Matrix< scalar_t > &&B, int priority, Layout layout, int64_t queue_index)
 Triangular solve matrix (multiple right-hand sides).
 
template<Target target = Target::HostTask, typename scalar_t >
void trtri (TriangularMatrix< scalar_t > &&A, int priority)
 Triangular inversion of single tile.
 
template<Target target = Target::HostTask, typename scalar_t >
void trtrm (TriangularMatrix< scalar_t > &&A, int priority)
 todo: update docs: multiply not inverse
 
template<Target target = Target::HostTask, typename scalar_t >
void permuteRows (Direction direction, Matrix< scalar_t > &&A, std::vector< Pivot > &pivot, Layout layout, int priority, int tag_base, int queue_index)
 Permutes rows according to the pivot vector.
 
template<Target target = Target::HostTask, typename scalar_t >
void permuteRowsCols (Direction direction, HermitianMatrix< scalar_t > &&A, std::vector< Pivot > &pivot, int priority, int tag)
 Permutes rows and columns symmetrically according to the pivot vector.
 
template<Target target = Target::HostTask, typename scalar_t >
void add (scalar_t alpha, Matrix< scalar_t > &&A, scalar_t beta, Matrix< scalar_t > &&B, int priority, int queue_index)
 General matrix add.
 
template<Target target = Target::HostTask, typename scalar_t >
void add (scalar_t alpha, BaseTrapezoidMatrix< scalar_t > &&A, scalar_t beta, BaseTrapezoidMatrix< scalar_t > &&B, int priority, int queue_index)
 Trapezoidal matrix add.
 
template<typename scalar_t >
void gerbt (Matrix< scalar_t > A11, Matrix< scalar_t > A12, Matrix< scalar_t > A21, Matrix< scalar_t > A22, Matrix< scalar_t > U1, Matrix< scalar_t > U2, Matrix< scalar_t > V1, Matrix< scalar_t > V2)
 Applies a single butterfly matrix to each side of A.
 
template<typename scalar_t >
void gerbt (Side side, Op trans, Matrix< scalar_t > B1, Matrix< scalar_t > B2, Matrix< scalar_t > U1, Matrix< scalar_t > U2)
 Applies a single butterfly matrix to one side of B.
 
template<typename scalar_t >
std::pair< Matrix< scalar_t >, Matrix< scalar_t > > rbt_generate (const Matrix< scalar_t > &A, const int64_t d, const int64_t seed)
 Constructs two random butterfly matrices in packed storage to transform the given matrix.
 
template<Target target, typename scalar_t >
void gebr1 (Matrix< scalar_t > &&A, int64_t n1, scalar_t *v1, int64_t n2, scalar_t *v2, int priority)
 Implements task 1 in the bidiagonal bulge chasing algorithm.
 
template<Target target, typename scalar_t >
void gebr2 (int64_t n1, scalar_t *v1, Matrix< scalar_t > &&A, int64_t n2, scalar_t *v2, int priority)
 Implements task 2 in the bidiagonal bulge chasing algorithm.
 
template<Target target, typename scalar_t >
void gebr3 (int64_t n1, scalar_t *v1, Matrix< scalar_t > &&A, int64_t n2, scalar_t *v2, int priority)
 Implements task 3 in the bidiagonal bulge chasing algorithm.
 
template<Target target, typename scalar_t >
void hebr1 (int64_t n, scalar_t *v, HermitianMatrix< scalar_t > &&A, int priority)
 Implements task type 1 in the tridiagonal bulge chasing algorithm.
 
template<Target target, typename scalar_t >
void hebr2 (int64_t n1, scalar_t *v1, int64_t n2, scalar_t *v2, Matrix< scalar_t > &&A, int priority)
 Implements task type 2 in the tridiagonal bulge chasing algorithm.
 
template<Target target, typename scalar_t >
void hebr3 (int64_t n, scalar_t *v, HermitianMatrix< scalar_t > &&A, int priority)
 Implements task type 3 in the tridiagonal bulge chasing algorithm.
 
template<Target target = Target::HostTask, typename scalar_t >
void norm (Norm in_norm, NormScope scope, Matrix< scalar_t > &&A, blas::real_type< scalar_t > *values, int priority, int queue_index)
 General matrix norm.
 
template<Target target = Target::HostTask, typename scalar_t >
void norm (Norm in_norm, NormScope scope, HermitianMatrix< scalar_t > &&A, blas::real_type< scalar_t > *values, int priority, int queue_index)
 Hermitian matrix norm.
 
template<Target target = Target::HostTask, typename scalar_t >
void norm (Norm in_norm, NormScope scope, SymmetricMatrix< scalar_t > &&A, blas::real_type< scalar_t > *values, int priority, int queue_index)
 Symmetric matrix norm.
 
template<Target target = Target::HostTask, typename scalar_t >
void norm (Norm in_norm, NormScope scope, TrapezoidMatrix< scalar_t > &&A, blas::real_type< scalar_t > *values, int priority, int queue_index)
 Trapezoid and triangular matrix norm.
 
template<Target target = Target::HostTask, typename scalar_t >
void norm (Norm in_norm, NormScope scope, BandMatrix< scalar_t > &&A, blas::real_type< scalar_t > *values, int priority, int queue_index)
 General banded matrix norm.
 
template<Target target = Target::HostTask, typename scalar_t >
void norm (Norm in_norm, NormScope scope, HermitianBandMatrix< scalar_t > &&A, blas::real_type< scalar_t > *values, int priority, int queue_index)
 Hermitian banded matrix norm.
 
template<Target target = Target::HostTask, typename scalar_t >
void getrf_panel (Matrix< scalar_t > &&A, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, blas::real_type< scalar_t > pivot_threshold, int max_panel_threads, int priority, int tag, int64_t *info)
 LU factorization of a column of tiles.
 
template<Target target = Target::HostTask, typename scalar_t >
void getrf_nopiv (Matrix< scalar_t > &&A, int64_t ib, int priority, int64_t *info)
 LU factorization of single tile without pivoting.
 
template<Target target = Target::HostTask, typename scalar_t >
void getrf_tntpiv_panel (Matrix< scalar_t > &&A, Matrix< scalar_t > &&Awork, std::vector< char * > dwork_array, size_t work_bytes, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, int max_panel_threads, int priority, int64_t *info)
 LU factorization of a column of tiles.
 
template<Target target = Target::HostTask, typename scalar_t >
void geqrf (Matrix< scalar_t > &&A, Matrix< scalar_t > &&T, std::vector< scalar_t * > dwork_array, size_t work_size, int64_t ib, int max_panel_threads, int priority)
 QR factorization of a column of tiles.
 
template<Target target = Target::HostTask, typename scalar_t >
void geqrf (Matrix< scalar_t > &&A, Matrix< scalar_t > &&T, int64_t ib, int max_panel_threads, int priority=0)
 
template<Target target = Target::HostTask, typename scalar_t >
void he2hb_hemm (HermitianMatrix< scalar_t > &&A, Matrix< scalar_t > &&B, Matrix< scalar_t > &&C, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 Apply local reflectors on a Hermitian trailing submatrix.
 
template<Target target = Target::HostTask, typename scalar_t >
void he2hb_trmm (HermitianMatrix< scalar_t > &&AH, Matrix< scalar_t > &&A, Matrix< scalar_t > &&B, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 Triangular matrix multiply.
 
template<Target target = Target::HostTask, typename scalar_t >
void he2hb_gemm (scalar_t alpha, Matrix< scalar_t > &&A, Matrix< scalar_t > &&B, scalar_t beta, Matrix< scalar_t > &&C, int panel_rank, int priority, int64_t queue_index)
 Inner product C = AB to update a single block C, where A and B are single blocks.
 
template<Target target = Target::HostTask, typename scalar_t >
void he2hb_her2k_offdiag_ranks (scalar_t alpha, Matrix< scalar_t > &&A, Matrix< scalar_t > &&B, scalar_t beta, HermitianMatrix< scalar_t > &&C, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 matrix multiply to update trailing matrix, except the diagonal tiles.
 
template<Target target = Target::HostTask, typename scalar_t >
void ttqrt (Matrix< scalar_t > &&A, Matrix< scalar_t > &&T)
 Distributed QR triangle-triangle factorization of column of tiles.
 
template<Target target = Target::HostTask, typename scalar_t >
void ttlqt (Matrix< scalar_t > &&A, Matrix< scalar_t > &&T)
 Distributed LQ triangle-triangle factorization of row of tiles.
 
template<Target target = Target::HostTask, typename scalar_t >
void ttmqr (Side side, Op op, Matrix< scalar_t > &&A, Matrix< scalar_t > &&T, Matrix< scalar_t > &&C, int tag)
 Distributed multiply matrix by Q from QR triangle-triangle factorization of column of tiles.
 
template<Target target = Target::HostTask, typename scalar_t >
void ttmlq (Side side, Op op, Matrix< scalar_t > &&A, Matrix< scalar_t > &&T, Matrix< scalar_t > &&C, int tag)
 Distributed multiply matrix by Q from LQ triangle-triangle factorization of row of tiles.
 
template<Target target = Target::HostTask, typename scalar_t >
void hettmqr (Op op, Matrix< scalar_t > &&V, Matrix< scalar_t > &&T, HermitianMatrix< scalar_t > &&C, int tag)
 Distributed multiply Hermitian matrix on left and right by Q from QR triangle-triangle factorization of column of tiles.
 
template<Target target = Target::HostTask, typename scalar_t >
void unmqr (Side side, Op op, Matrix< scalar_t > &&V, Matrix< scalar_t > &&T, Matrix< scalar_t > &&C, Matrix< scalar_t > &&W, int priority, int64_t queue_index)
 Multiply matrix by Q from local QR factorization.
 
template<Target target = Target::HostTask, typename scalar_t >
void unmlq (Side side, Op op, Matrix< scalar_t > &&V, Matrix< scalar_t > &&T, Matrix< scalar_t > &&C, Matrix< scalar_t > &&W, int priority, int64_t queue_index)
 Multiply matrix by Q from local LQ factorization.
 
template<Target target = Target::HostTask, typename scalar_t >
void unmtr_hb2st (Side side, Op op, Matrix< scalar_t > &V, Matrix< scalar_t > &C)
 
template<Target target = Target::HostTask, typename scalar_t >
void unmbr_tb2bd (Side side, Op op, Matrix< scalar_t > &V, Matrix< scalar_t > &C)
 
template<Target target = Target::HostTask, typename scalar_t >
int64_t potrf (HermitianMatrix< scalar_t > &&A, int priority, int64_t queue_index, lapack::device_info_int *device_info)
 Cholesky factorization of single tile.
 
template<Target target = Target::HostTask, typename scalar_t >
void hegst (int64_t itype, HermitianMatrix< scalar_t > &&A, HermitianMatrix< scalar_t > &&B)
 Reduces a complex Hermitian positive-definite generalized eigenvalue problem to the standard form of single tile.
 
template<typename scalar_t >
void norm1est (Matrix< scalar_t > &X, Matrix< scalar_t > &V, Matrix< int64_t > &isgn, blas::real_type< scalar_t > *est, int *kase, std::vector< int64_t > &isave)
 Distributed parallel estimates of the 1-norm of a square matrix A.
 
void reduce_info (int64_t *info, MPI_Comm mpi_comm)
 MPI reduce info, used in getrf, hetrf, etc.
 
template<typename scalar_t >
std::vector< int64_t > device_regions_range (RowCol dim, BaseMatrix< scalar_t > &A)
 Computes the range of tiles with either the same mb or the same nb.
 
template<bool store_diag, int mat_count, typename scalar_t , bool diag_same = !store_diag>
std::vector< device_regions_params< store_diag, mat_count > > device_regions_build (std::array< std::reference_wrapper< BaseMatrix< scalar_t > >, mat_count > mats, std::array< scalar_t **, mat_count > mats_array_host, int64_t device, std::function< void(int64_t, int64_t, int64_t)> extra_setup, std::vector< int64_t > &irange, std::vector< int64_t > &jrange)
 Computes and populates the regions for the given matrices.
 
template<bool store_diag, int mat_count, typename scalar_t , bool diag_same = !store_diag>
std::vector< device_regions_params< store_diag, mat_count > > device_regions_build (std::array< std::reference_wrapper< BaseMatrix< scalar_t > >, mat_count > mats, std::array< scalar_t **, mat_count > mats_array_host, int64_t device, std::function< void(int64_t, int64_t, int64_t)> extra_setup={})
 Computes and populates the regions for the given matrices.
 
template<typename real_t >
void copy_col (Matrix< real_t > &A, int64_t j, int64_t jj, real_t *x)
 Copy local rows of column from matrix A, tile j, column jj, to vector x.
 
template<typename real_t >
void copy_col (real_t *x, Matrix< real_t > &A, int64_t j, int64_t jj)
 Copy local rows of column from vector x to matrix A, tile j, column jj.
 
template<typename real_t >
void copy_col (Matrix< real_t > &A, int64_t j, int64_t jj, Matrix< real_t > &B, int64_t k, int64_t kk)
 Copy local rows of column from matrix A, tile j, column jj, to matrix B, tile k, column kk.
 
template<typename scalar_t >
void copyhb2st (internal::TargetType< Target::HostTask >, HermitianBandMatrix< scalar_t > A, std::vector< blas::real_type< scalar_t > > &D, std::vector< blas::real_type< scalar_t > > &E)
 Copy tri-diagonal HermitianBand matrix to two vectors.
 
template void copyhb2st< Target::HostTask, float > (HermitianBandMatrix< float > &A, std::vector< float > &D, std::vector< float > &E)
 
template void copyhb2st< Target::HostTask, double > (HermitianBandMatrix< double > &A, std::vector< double > &D, std::vector< double > &E)
 
template void copyhb2st< Target::HostTask, std::complex< float > > (HermitianBandMatrix< std::complex< float > > &A, std::vector< float > &D, std::vector< float > &E)
 
template void copyhb2st< Target::HostTask, std::complex< double > > (HermitianBandMatrix< std::complex< double > > &A, std::vector< double > &D, std::vector< double > &E)
 
template<typename scalar_t >
void copytb2bd (internal::TargetType< Target::HostTask >, TriangularBandMatrix< scalar_t > A, std::vector< blas::real_type< scalar_t > > &D, std::vector< blas::real_type< scalar_t > > &E)
 Copy bi-diagonal TriangularBand matrix to two vectors.
 
template void copytb2bd< Target::HostTask, float > (TriangularBandMatrix< float > &A, std::vector< float > &D, std::vector< float > &E)
 
template void copytb2bd< Target::HostTask, double > (TriangularBandMatrix< double > &A, std::vector< double > &D, std::vector< double > &E)
 
template void copytb2bd< Target::HostTask, std::complex< float > > (TriangularBandMatrix< std::complex< float > > &A, std::vector< float > &D, std::vector< float > &E)
 
template void copytb2bd< Target::HostTask, std::complex< double > > (TriangularBandMatrix< std::complex< double > > &A, std::vector< double > &D, std::vector< double > &E)
 
template<typename scalar_t >
void norm (internal::TargetType< Target::HostTask >, Norm in_norm, NormScope scope, BandMatrix< scalar_t > &A, blas::real_type< scalar_t > *values, int priority, int queue_index)
 General banded matrix norm.
 
template<typename scalar_t >
void norm (internal::TargetType< Target::HostNest >, Norm in_norm, NormScope scope, BandMatrix< scalar_t > &A, blas::real_type< scalar_t > *values, int priority, int queue_index)
 General banded matrix norm.
 
template<typename scalar_t >
void norm (internal::TargetType< Target::Devices >, Norm in_norm, NormScope scope, BandMatrix< scalar_t > &A, blas::real_type< scalar_t > *values, int priority, int queue_index)
 General banded matrix norm.
 
template void norm< Target::HostTask, float > (Norm in_norm, NormScope scope, BandMatrix< float > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::HostNest, float > (Norm in_norm, NormScope scope, BandMatrix< float > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::Devices, float > (Norm in_norm, NormScope scope, BandMatrix< float > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::HostTask, double > (Norm in_norm, NormScope scope, BandMatrix< double > &&A, double *values, int priority, int queue_index)
 
template void norm< Target::HostNest, double > (Norm in_norm, NormScope scope, BandMatrix< double > &&A, double *values, int priority, int queue_index)
 
template void norm< Target::Devices, double > (Norm in_norm, NormScope scope, BandMatrix< double > &&A, double *values, int priority, int queue_index)
 
template void norm< Target::HostTask, std::complex< float > > (Norm in_norm, NormScope scope, BandMatrix< std::complex< float > > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::HostNest, std::complex< float > > (Norm in_norm, NormScope scope, BandMatrix< std::complex< float > > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::Devices, std::complex< float > > (Norm in_norm, NormScope scope, BandMatrix< std::complex< float > > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::HostTask, std::complex< double > > (Norm in_norm, NormScope scope, BandMatrix< std::complex< double > > &&A, double *values, int priority, int queue_index)
 
template void norm< Target::HostNest, std::complex< double > > (Norm in_norm, NormScope scope, BandMatrix< std::complex< double > > &&A, double *values, int priority, int queue_index)
 
template void norm< Target::Devices, std::complex< double > > (Norm in_norm, NormScope scope, BandMatrix< std::complex< double > > &&A, double *values, int priority, int queue_index)
 
template<typename scalar_t >
void add (internal::TargetType< Target::HostTask >, scalar_t alpha, Matrix< scalar_t > &A, scalar_t beta, Matrix< scalar_t > &B, int priority, int queue_index)
 General matrix add.
 
template<typename scalar_t >
void add (internal::TargetType< Target::HostNest >, scalar_t alpha, Matrix< scalar_t > &A, scalar_t beta, Matrix< scalar_t > &B, int priority, int queue_index)
 todo: this function should just be named "add".
 
template<typename scalar_t >
void add (internal::TargetType< Target::HostBatch >, scalar_t alpha, Matrix< scalar_t > &A, scalar_t beta, Matrix< scalar_t > &B, int priority, int queue_index)
 todo: this function should just be named "add".
 
template<typename scalar_t >
void add (internal::TargetType< Target::Devices >, scalar_t alpha, Matrix< scalar_t > &A, scalar_t beta, Matrix< scalar_t > &B, int priority, int queue_index)
 General matrix add.
 
template void add< Target::HostTask, float > (float alpha, Matrix< float > &&A, float beta, Matrix< float > &&B, int priority, int queue_index)
 todo: these functions should just be named "add".
 
template void add< Target::HostNest, float > (float alpha, Matrix< float > &&A, float beta, Matrix< float > &&B, int priority, int queue_index)
 
template void add< Target::HostBatch, float > (float alpha, Matrix< float > &&A, float beta, Matrix< float > &&B, int priority, int queue_index)
 
template void add< Target::Devices, float > (float alpha, Matrix< float > &&A, float beta, Matrix< float > &&B, int priority, int queue_index)
 
template void add< Target::HostTask, double > (double alpha, Matrix< double > &&A, double beta, Matrix< double > &&B, int priority, int queue_index)
 
template void add< Target::HostNest, double > (double alpha, Matrix< double > &&A, double beta, Matrix< double > &&B, int priority, int queue_index)
 
template void add< Target::HostBatch, double > (double alpha, Matrix< double > &&A, double beta, Matrix< double > &&B, int priority, int queue_index)
 
template void add< Target::Devices, double > (double alpha, Matrix< double > &&A, double beta, Matrix< double > &&B, int priority, int queue_index)
 
template void add< Target::HostTask, std::complex< float > > (std::complex< float > alpha, Matrix< std::complex< float > > &&A, std::complex< float > beta, Matrix< std::complex< float > > &&B, int priority, int queue_index)
 
template void add< Target::HostNest, std::complex< float > > (std::complex< float > alpha, Matrix< std::complex< float > > &&A, std::complex< float > beta, Matrix< std::complex< float > > &&B, int priority, int queue_index)
 
template void add< Target::HostBatch, std::complex< float > > (std::complex< float > alpha, Matrix< std::complex< float > > &&A, std::complex< float > beta, Matrix< std::complex< float > > &&B, int priority, int queue_index)
 
template void add< Target::Devices, std::complex< float > > (std::complex< float > alpha, Matrix< std::complex< float > > &&A, std::complex< float > beta, Matrix< std::complex< float > > &&B, int priority, int queue_index)
 
template void add< Target::HostTask, std::complex< double > > (std::complex< double > alpha, Matrix< std::complex< double > > &&A, std::complex< double > beta, Matrix< std::complex< double > > &&B, int priority, int queue_index)
 
template void add< Target::HostNest, std::complex< double > > (std::complex< double > alpha, Matrix< std::complex< double > > &&A, std::complex< double > beta, Matrix< std::complex< double > > &&B, int priority, int queue_index)
 
template void add< Target::HostBatch, std::complex< double > > (std::complex< double > alpha, Matrix< std::complex< double > > &&A, std::complex< double > beta, Matrix< std::complex< double > > &&B, int priority, int queue_index)
 
template void add< Target::Devices, std::complex< double > > (std::complex< double > alpha, Matrix< std::complex< double > > &&A, std::complex< double > beta, Matrix< std::complex< double > > &&B, int priority, int queue_index)
 
template<typename scalar_t >
void gerfg (Matrix< scalar_t > &A, int64_t n, scalar_t *v)
 Generates a Householder reflector \(H = I - \tau v v^H\) using the first column of the matrix \(A\), i.e., a reflector that zeroes \(A[1:m-1, 0]\).
 
template<typename scalar_t >
void gerf (int64_t n, scalar_t *v, Matrix< scalar_t > &A)
 Applies a Householder reflector \(H = I - \tau v v^H\) to the matrix \(A\) from the left.
 
template<typename scalar_t >
void gebr1 (internal::TargetType< Target::HostTask >, Matrix< scalar_t > &A, int64_t n1, scalar_t *v1, int64_t n2, scalar_t *v2, int priority)
 Implements task 1 in the bidiagonal bulge chasing algorithm.
 
template<typename scalar_t >
void gebr2 (internal::TargetType< Target::HostTask >, int64_t n1, scalar_t *v1, Matrix< scalar_t > &A, int64_t n2, scalar_t *v2, int priority)
 Implements task 2 in the bidiagonal bulge chasing algorithm.
 
template<typename scalar_t >
void gebr3 (internal::TargetType< Target::HostTask >, int64_t n1, scalar_t *v1, Matrix< scalar_t > &A, int64_t n2, scalar_t *v2, int priority)
 Implements task 3 in the bidiagonal bulge chasing algorithm.
 
template void gerfg (Matrix< float > &A, int64_t n, float *v)
 
template void gerfg (Matrix< double > &A, int64_t n, double *v)
 
template void gerfg (Matrix< std::complex< float > > &A, int64_t n, std::complex< float > *v)
 
template void gerfg (Matrix< std::complex< double > > &A, int64_t n, std::complex< double > *v)
 
template void gerf (int64_t n, float *v, Matrix< float > &A)
 
template void gerf (int64_t n, double *v, Matrix< double > &A)
 
template void gerf (int64_t n, std::complex< float > *v, Matrix< std::complex< float > > &A)
 
template void gerf (int64_t n, std::complex< double > *v, Matrix< std::complex< double > > &A)
 
template void gebr1< Target::HostTask, float > (Matrix< float > &&A, int64_t n1, float *v1, int64_t n2, float *v2, int priority)
 
template void gebr1< Target::HostTask, double > (Matrix< double > &&A, int64_t n1, double *v1, int64_t n2, double *v2, int priority)
 
template void gebr1< Target::HostTask, std::complex< float > > (Matrix< std::complex< float > > &&A, int64_t n1, std::complex< float > *v1, int64_t n2, std::complex< float > *v2, int priority)
 
template void gebr1< Target::HostTask, std::complex< double > > (Matrix< std::complex< double > > &&A, int64_t n1, std::complex< double > *v1, int64_t n2, std::complex< double > *v2, int priority)
 
template void gebr2< Target::HostTask, float > (int64_t n1, float *v1, Matrix< float > &&A, int64_t n2, float *v2, int priority)
 
template void gebr2< Target::HostTask, double > (int64_t n1, double *v1, Matrix< double > &&A, int64_t n2, double *v2, int priority)
 
template void gebr2< Target::HostTask, std::complex< float > > (int64_t n1, std::complex< float > *v1, Matrix< std::complex< float > > &&A, int64_t n2, std::complex< float > *v2, int priority)
 
template void gebr2< Target::HostTask, std::complex< double > > (int64_t n1, std::complex< double > *v1, Matrix< std::complex< double > > &&A, int64_t n2, std::complex< double > *v2, int priority)
 
template void gebr3< Target::HostTask, float > (int64_t n1, float *v1, Matrix< float > &&A, int64_t n2, float *v2, int priority)
 
template void gebr3< Target::HostTask, double > (int64_t n1, double *v1, Matrix< double > &&A, int64_t n2, double *v2, int priority)
 
template void gebr3< Target::HostTask, std::complex< float > > (int64_t n1, std::complex< float > *v1, Matrix< std::complex< float > > &&A, int64_t n2, std::complex< float > *v2, int priority)
 
template void gebr3< Target::HostTask, std::complex< double > > (int64_t n1, std::complex< double > *v1, Matrix< std::complex< double > > &&A, int64_t n2, std::complex< double > *v2, int priority)
 
template<typename src_scalar_t , typename dst_scalar_t >
void copy (internal::TargetType< Target::HostTask >, Matrix< src_scalar_t > &A, Matrix< dst_scalar_t > &B, int priority, int queue_index)
 Copy and precision conversion.
 
template<typename src_scalar_t , typename dst_scalar_t >
void copy (internal::TargetType< Target::HostNest >, Matrix< src_scalar_t > &A, Matrix< dst_scalar_t > &B, int priority, int queue_index)
 
template<typename src_scalar_t , typename dst_scalar_t >
void copy (internal::TargetType< Target::HostBatch >, Matrix< src_scalar_t > &A, Matrix< dst_scalar_t > &B, int priority, int queue_index)
 
template<typename src_scalar_t , typename dst_scalar_t >
void copy (internal::TargetType< Target::Devices >, Matrix< src_scalar_t > &A, Matrix< dst_scalar_t > &B, int priority, int queue_index)
 Copy and precision conversion.
 
template void copy< Target::HostTask, float, float > (Matrix< float > &&A, Matrix< float > &&B, int priority, int queue_index)
 
template void copy< Target::HostNest, float, float > (Matrix< float > &&A, Matrix< float > &&B, int priority, int queue_index)
 
template void copy< Target::HostBatch, float, float > (Matrix< float > &&A, Matrix< float > &&B, int priority, int queue_index)
 
template void copy< Target::Devices, float, float > (Matrix< float > &&A, Matrix< float > &&B, int priority, int queue_index)
 
template void copy< Target::HostTask, float, double > (Matrix< float > &&A, Matrix< double > &&B, int priority, int queue_index)
 
template void copy< Target::HostNest, float, double > (Matrix< float > &&A, Matrix< double > &&B, int priority, int queue_index)
 
template void copy< Target::HostBatch, float, double > (Matrix< float > &&A, Matrix< double > &&B, int priority, int queue_index)
 
template void copy< Target::Devices, float, double > (Matrix< float > &&A, Matrix< double > &&B, int priority, int queue_index)
 
template void copy< Target::HostTask, double, double > (Matrix< double > &&A, Matrix< double > &&B, int priority, int queue_index)
 
template void copy< Target::HostNest, double, double > (Matrix< double > &&A, Matrix< double > &&B, int priority, int queue_index)
 
template void copy< Target::HostBatch, double, double > (Matrix< double > &&A, Matrix< double > &&B, int priority, int queue_index)
 
template void copy< Target::Devices, double, double > (Matrix< double > &&A, Matrix< double > &&B, int priority, int queue_index)
 
template void copy< Target::HostTask, double, float > (Matrix< double > &&A, Matrix< float > &&B, int priority, int queue_index)
 
template void copy< Target::HostNest, double, float > (Matrix< double > &&A, Matrix< float > &&B, int priority, int queue_index)
 
template void copy< Target::HostBatch, double, float > (Matrix< double > &&A, Matrix< float > &&B, int priority, int queue_index)
 
template void copy< Target::Devices, double, float > (Matrix< double > &&A, Matrix< float > &&B, int priority, int queue_index)
 
template void copy< Target::HostTask, float, std::complex< float > > (Matrix< float > &&A, Matrix< std::complex< float > > &&B, int priority, int queue_index)
 
template void copy< Target::Devices, float, std::complex< float > > (Matrix< float > &&A, Matrix< std::complex< float > > &&B, int priority, int queue_index)
 
template void copy< Target::HostNest, float, std::complex< float > > (Matrix< float > &&A, Matrix< std::complex< float > > &&B, int priority, int queue_index)
 
template void copy< Target::HostBatch, float, std::complex< float > > (Matrix< float > &&A, Matrix< std::complex< float > > &&B, int priority, int queue_index)
 
template void copy< Target::HostTask, double, std::complex< double > > (Matrix< double > &&A, Matrix< std::complex< double > > &&B, int priority, int queue_index)
 
template void copy< Target::Devices, double, std::complex< double > > (Matrix< double > &&A, Matrix< std::complex< double > > &&B, int priority, int queue_index)
 
template void copy< Target::HostNest, double, std::complex< double > > (Matrix< double > &&A, Matrix< std::complex< double > > &&B, int priority, int queue_index)
 
template void copy< Target::HostBatch, double, std::complex< double > > (Matrix< double > &&A, Matrix< std::complex< double > > &&B, int priority, int queue_index)
 
template<typename scalar_t >
void gemm (internal::TargetType< Target::HostTask >, scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, Matrix< scalar_t > &C, Layout layout, int priority, int64_t queue_index)
 General matrix multiply to update trailing matrix, where A is a single block column and B is a single block row.
 
template<typename scalar_t >
void gemm (internal::TargetType< Target::HostNest >, scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, Matrix< scalar_t > &C, Layout layout, int priority, int64_t queue_index)
 General matrix multiply to update trailing matrix, where A is a single block column and B is a single block row.
 
template<typename scalar_t >
void gemm (internal::TargetType< Target::HostBatch >, scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, Matrix< scalar_t > &C, Layout layout, int priority, int64_t queue_index)
 General matrix multiply to update trailing matrix, where A is a single block col (mt tiles by 1 tile) and B is a single block row (1 tile by nt tiles) and C is mt tiles by nt tiles.
 
template<typename scalar_t >
void gemm (internal::TargetType< Target::Devices >, scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, Matrix< scalar_t > &C, Layout layout, int priority, int64_t queue_index)
 General matrix multiply to update trailing matrix, where A is a single block column and B is a single block row.
 
template void gemm< Target::HostTask, float > (float alpha, Matrix< float > &&A, Matrix< float > &&B, float beta, Matrix< float > &&C, Layout layout, int priority, int64_t queue_index)
 
template void gemm< Target::HostNest, float > (float alpha, Matrix< float > &&A, Matrix< float > &&B, float beta, Matrix< float > &&C, Layout layout, int priority, int64_t queue_index)
 
template void gemm< Target::HostBatch, float > (float alpha, Matrix< float > &&A, Matrix< float > &&B, float beta, Matrix< float > &&C, Layout layout, int priority, int64_t queue_index)
 
template void gemm< Target::Devices, float > (float alpha, Matrix< float > &&A, Matrix< float > &&B, float beta, Matrix< float > &&C, Layout layout, int priority, int64_t queue_index)
 
template void gemm< Target::HostTask, double > (double alpha, Matrix< double > &&A, Matrix< double > &&B, double beta, Matrix< double > &&C, Layout layout, int priority, int64_t queue_index)
 
template void gemm< Target::HostNest, double > (double alpha, Matrix< double > &&A, Matrix< double > &&B, double beta, Matrix< double > &&C, Layout layout, int priority, int64_t queue_index)
 
template void gemm< Target::HostBatch, double > (double alpha, Matrix< double > &&A, Matrix< double > &&B, double beta, Matrix< double > &&C, Layout layout, int priority, int64_t queue_index)
 
template void gemm< Target::Devices, double > (double alpha, Matrix< double > &&A, Matrix< double > &&B, double beta, Matrix< double > &&C, Layout layout, int priority, int64_t queue_index)
 
template void gemm< Target::HostTask, std::complex< float > > (std::complex< float > alpha, Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, std::complex< float > beta, Matrix< std::complex< float > > &&C, Layout layout, int priority, int64_t queue_index)
 
template void gemm< Target::HostNest, std::complex< float > > (std::complex< float > alpha, Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, std::complex< float > beta, Matrix< std::complex< float > > &&C, Layout layout, int priority, int64_t queue_index)
 
template void gemm< Target::HostBatch, std::complex< float > > (std::complex< float > alpha, Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, std::complex< float > beta, Matrix< std::complex< float > > &&C, Layout layout, int priority, int64_t queue_index)
 
template void gemm< Target::Devices, std::complex< float > > (std::complex< float > alpha, Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, std::complex< float > beta, Matrix< std::complex< float > > &&C, Layout layout, int priority, int64_t queue_index)
 
template void gemm< Target::HostTask, std::complex< double > > (std::complex< double > alpha, Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, std::complex< double > beta, Matrix< std::complex< double > > &&C, Layout layout, int priority, int64_t queue_index)
 
template void gemm< Target::HostNest, std::complex< double > > (std::complex< double > alpha, Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, std::complex< double > beta, Matrix< std::complex< double > > &&C, Layout layout, int priority, int64_t queue_index)
 
template void gemm< Target::HostBatch, std::complex< double > > (std::complex< double > alpha, Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, std::complex< double > beta, Matrix< std::complex< double > > &&C, Layout layout, int priority, int64_t queue_index)
 
template void gemm< Target::Devices, std::complex< double > > (std::complex< double > alpha, Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, std::complex< double > beta, Matrix< std::complex< double > > &&C, Layout layout, int priority, int64_t queue_index)
 
template<typename scalar_t >
void gemmA (internal::TargetType< Target::HostTask >, scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, Matrix< scalar_t > &C, Layout layout, int priority, int queue_index)
 General matrix multiply for a left-looking update.
 
template<typename scalar_t >
void gemmA (internal::TargetType< Target::HostNest >, scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, Matrix< scalar_t > &C, Layout layout, int priority, int queue_index)
 
template<typename scalar_t >
void gemmA (internal::TargetType< Target::HostBatch >, scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, Matrix< scalar_t > &C, Layout layout, int priority, int queue_index)
 
template<typename scalar_t >
void gemmA (internal::TargetType< Target::Devices >, scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, Matrix< scalar_t > &C, Layout layout, int priority, int64_t queue_index)
 General matrix multiply for a left-looking update where TODO GPU device batched-BLAS implementation.
 
template void gemmA< Target::HostTask, float > (float alpha, Matrix< float > &&A, Matrix< float > &&B, float beta, Matrix< float > &&C, Layout layout, int priority, int64_t queue_index)
 
template void gemmA< Target::HostTask, double > (double alpha, Matrix< double > &&A, Matrix< double > &&B, double beta, Matrix< double > &&C, Layout layout, int priority, int64_t queue_index)
 
template void gemmA< Target::HostTask, std::complex< float > > (std::complex< float > alpha, Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, std::complex< float > beta, Matrix< std::complex< float > > &&C, Layout layout, int priority, int64_t queue_index)
 
template void gemmA< Target::HostTask, std::complex< double > > (std::complex< double > alpha, Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, std::complex< double > beta, Matrix< std::complex< double > > &&C, Layout layout, int priority, int64_t queue_index)
 
template void gemmA< Target::HostNest, float > (float alpha, Matrix< float > &&A, Matrix< float > &&B, float beta, Matrix< float > &&C, Layout layout, int priority, int64_t queue_index)
 
template void gemmA< Target::HostNest, double > (double alpha, Matrix< double > &&A, Matrix< double > &&B, double beta, Matrix< double > &&C, Layout layout, int priority, int64_t queue_index)
 
template void gemmA< Target::HostNest, std::complex< float > > (std::complex< float > alpha, Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, std::complex< float > beta, Matrix< std::complex< float > > &&C, Layout layout, int priority, int64_t queue_index)
 
template void gemmA< Target::HostNest, std::complex< double > > (std::complex< double > alpha, Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, std::complex< double > beta, Matrix< std::complex< double > > &&C, Layout layout, int priority, int64_t queue_index)
 
template void gemmA< Target::HostBatch, float > (float alpha, Matrix< float > &&A, Matrix< float > &&B, float beta, Matrix< float > &&C, Layout layout, int priority, int64_t queue_index)
 
template void gemmA< Target::HostBatch, double > (double alpha, Matrix< double > &&A, Matrix< double > &&B, double beta, Matrix< double > &&C, Layout layout, int priority, int64_t queue_index)
 
template void gemmA< Target::HostBatch, std::complex< float > > (std::complex< float > alpha, Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, std::complex< float > beta, Matrix< std::complex< float > > &&C, Layout layout, int priority, int64_t queue_index)
 
template void gemmA< Target::HostBatch, std::complex< double > > (std::complex< double > alpha, Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, std::complex< double > beta, Matrix< std::complex< double > > &&C, Layout layout, int priority, int64_t queue_index)
 
template void gemmA< Target::Devices, float > (float alpha, Matrix< float > &&A, Matrix< float > &&B, float beta, Matrix< float > &&C, Layout layout, int priority, int64_t queue_index)
 
template void gemmA< Target::Devices, double > (double alpha, Matrix< double > &&A, Matrix< double > &&B, double beta, Matrix< double > &&C, Layout layout, int priority, int64_t queue_index)
 
template void gemmA< Target::Devices, std::complex< float > > (std::complex< float > alpha, Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, std::complex< float > beta, Matrix< std::complex< float > > &&C, Layout layout, int priority, int64_t queue_index)
 
template void gemmA< Target::Devices, std::complex< double > > (std::complex< double > alpha, Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, std::complex< double > beta, Matrix< std::complex< double > > &&C, Layout layout, int priority, int64_t queue_index)
 
template<typename scalar_t >
void norm (internal::TargetType< Target::HostTask >, Norm in_norm, NormScope scope, Matrix< scalar_t > &A, blas::real_type< scalar_t > *values, int priority, int queue_index)
 General matrix norm.
 
template<typename scalar_t >
void norm (internal::TargetType< Target::HostNest >, Norm in_norm, NormScope scope, Matrix< scalar_t > &A, blas::real_type< scalar_t > *values, int priority, int queue_index)
 General matrix norm.
 
template<typename scalar_t >
void norm (internal::TargetType< Target::Devices >, Norm in_norm, NormScope scope, Matrix< scalar_t > &A, blas::real_type< scalar_t > *values, int priority, int queue_index)
 General matrix norm.
 
template void norm< Target::HostTask, float > (Norm in_norm, NormScope scope, Matrix< float > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::HostNest, float > (Norm in_norm, NormScope scope, Matrix< float > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::Devices, float > (Norm in_norm, NormScope scope, Matrix< float > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::HostTask, double > (Norm in_norm, NormScope scope, Matrix< double > &&A, double *values, int priority, int queue_index)
 
template void norm< Target::HostNest, double > (Norm in_norm, NormScope scope, Matrix< double > &&A, double *values, int priority, int queue_index)
 
template void norm< Target::Devices, double > (Norm in_norm, NormScope scope, Matrix< double > &&A, double *values, int priority, int queue_index)
 
template void norm< Target::HostTask, std::complex< float > > (Norm in_norm, NormScope scope, Matrix< std::complex< float > > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::HostNest, std::complex< float > > (Norm in_norm, NormScope scope, Matrix< std::complex< float > > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::Devices, std::complex< float > > (Norm in_norm, NormScope scope, Matrix< std::complex< float > > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::HostTask, std::complex< double > > (Norm in_norm, NormScope scope, Matrix< std::complex< double > > &&A, double *values, int priority, int queue_index)
 
template void norm< Target::HostNest, std::complex< double > > (Norm in_norm, NormScope scope, Matrix< std::complex< double > > &&A, double *values, int priority, int queue_index)
 
template void norm< Target::Devices, std::complex< double > > (Norm in_norm, NormScope scope, Matrix< std::complex< double > > &&A, double *values, int priority, int queue_index)
 
template<typename scalar_t >
void geqrf (internal::TargetType< Target::HostTask >, Matrix< scalar_t > &A, Matrix< scalar_t > &T, std::vector< scalar_t * > dwork_array, size_t work_size, int64_t ib, int max_panel_threads, int priority)
 QR factorization of a column of tiles, HostTask implementation.
 
template<typename scalar_t >
void geqrf (internal::TargetType< Target::HostNest >, Matrix< scalar_t > &A, Matrix< scalar_t > &T, std::vector< scalar_t * > dwork_array, size_t work_size, int64_t ib, int max_panel_threads, int priority)
 QR factorization of a column of tiles, HostNest implementation.
 
template<typename scalar_t >
void geqrf (internal::TargetType< Target::HostBatch >, Matrix< scalar_t > &A, Matrix< scalar_t > &T, std::vector< scalar_t * > dwork_array, size_t work_size, int64_t ib, int max_panel_threads, int priority)
 QR factorization of a column of tiles, HostBatch implementation.
 
template<typename scalar_t >
void geqrf (internal::TargetType< Target::Devices >, Matrix< scalar_t > &A, Matrix< scalar_t > &T, std::vector< scalar_t * > dwork_array, size_t work_size, int64_t ib, int max_panel_threads, int priority)
 QR factorization of a column of tiles, device implementation.
 
template void geqrf< Target::HostTask, float > (Matrix< float > &&A, Matrix< float > &&T, std::vector< float * > dwork_array, size_t work_size, int64_t ib, int max_panel_threads, int priority)
 
template void geqrf< Target::HostTask, double > (Matrix< double > &&A, Matrix< double > &&T, std::vector< double * > dwork_array, size_t work_size, int64_t ib, int max_panel_threads, int priority)
 
template void geqrf< Target::HostTask, std::complex< float > > (Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&T, std::vector< std::complex< float > * > dwork_array, size_t work_size, int64_t ib, int max_panel_threads, int priority)
 
template void geqrf< Target::HostTask, std::complex< double > > (Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&T, std::vector< std::complex< double > * > dwork_array, size_t work_size, int64_t ib, int max_panel_threads, int priority)
 
template void geqrf< Target::HostNest, float > (Matrix< float > &&A, Matrix< float > &&T, std::vector< float * > dwork_array, size_t work_size, int64_t ib, int max_panel_threads, int priority)
 
template void geqrf< Target::HostNest, double > (Matrix< double > &&A, Matrix< double > &&T, std::vector< double * > dwork_array, size_t work_size, int64_t ib, int max_panel_threads, int priority)
 
template void geqrf< Target::HostNest, std::complex< float > > (Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&T, std::vector< std::complex< float > * > dwork_array, size_t work_size, int64_t ib, int max_panel_threads, int priority)
 
template void geqrf< Target::HostNest, std::complex< double > > (Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&T, std::vector< std::complex< double > * > dwork_array, size_t work_size, int64_t ib, int max_panel_threads, int priority)
 
template void geqrf< Target::HostBatch, float > (Matrix< float > &&A, Matrix< float > &&T, std::vector< float * > dwork_array, size_t work_size, int64_t ib, int max_panel_threads, int priority)
 
template void geqrf< Target::HostBatch, double > (Matrix< double > &&A, Matrix< double > &&T, std::vector< double * > dwork_array, size_t work_size, int64_t ib, int max_panel_threads, int priority)
 
template void geqrf< Target::HostBatch, std::complex< float > > (Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&T, std::vector< std::complex< float > * > dwork_array, size_t work_size, int64_t ib, int max_panel_threads, int priority)
 
template void geqrf< Target::HostBatch, std::complex< double > > (Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&T, std::vector< std::complex< double > * > dwork_array, size_t work_size, int64_t ib, int max_panel_threads, int priority)
 
template void geqrf< Target::Devices, float > (Matrix< float > &&A, Matrix< float > &&T, std::vector< float * > dwork_array, size_t work_size, int64_t ib, int max_panel_threads, int priority)
 
template void geqrf< Target::Devices, double > (Matrix< double > &&A, Matrix< double > &&T, std::vector< double * > dwork_array, size_t work_size, int64_t ib, int max_panel_threads, int priority)
 
template void geqrf< Target::Devices, std::complex< float > > (Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&T, std::vector< std::complex< float > * > dwork_array, size_t work_size, int64_t ib, int max_panel_threads, int priority)
 
template void geqrf< Target::Devices, std::complex< double > > (Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&T, std::vector< std::complex< double > * > dwork_array, size_t work_size, int64_t ib, int max_panel_threads, int priority)
 
template void gerbt (Matrix< float >, Matrix< float >, Matrix< float >, Matrix< float >, Matrix< float >, Matrix< float >, Matrix< float >, Matrix< float >)
 
template void gerbt (Matrix< double >, Matrix< double >, Matrix< double >, Matrix< double >, Matrix< double >, Matrix< double >, Matrix< double >, Matrix< double >)
 
template void gerbt (Matrix< std::complex< float > >, Matrix< std::complex< float > >, Matrix< std::complex< float > >, Matrix< std::complex< float > >, Matrix< std::complex< float > >, Matrix< std::complex< float > >, Matrix< std::complex< float > >, Matrix< std::complex< float > >)
 
template void gerbt (Matrix< std::complex< double > >, Matrix< std::complex< double > >, Matrix< std::complex< double > >, Matrix< std::complex< double > >, Matrix< std::complex< double > >, Matrix< std::complex< double > >, Matrix< std::complex< double > >, Matrix< std::complex< double > >)
 
template void gerbt (Side, Op, Matrix< float >, Matrix< float >, Matrix< float >, Matrix< float >)
 
template void gerbt (Side, Op, Matrix< double >, Matrix< double >, Matrix< double >, Matrix< double >)
 
template void gerbt (Side, Op, Matrix< std::complex< float > >, Matrix< std::complex< float > >, Matrix< std::complex< float > >, Matrix< std::complex< float > >)
 
template void gerbt (Side, Op, Matrix< std::complex< double > >, Matrix< std::complex< double > >, Matrix< std::complex< double > >, Matrix< std::complex< double > >)
 
template<typename scalar_t >
void scale (internal::TargetType< Target::HostTask >, blas::real_type< scalar_t > numer, blas::real_type< scalar_t > denom, Matrix< scalar_t > &A, int priority, int queue_index)
 Scale matrix entries by the real scalar numer/denom.
 
template<typename scalar_t >
void scale (internal::TargetType< Target::HostNest >, blas::real_type< scalar_t > numer, blas::real_type< scalar_t > denom, Matrix< scalar_t > &A, int priority, int queue_index)
 
template<typename scalar_t >
void scale (internal::TargetType< Target::HostBatch >, blas::real_type< scalar_t > numer, blas::real_type< scalar_t > denom, Matrix< scalar_t > &A, int priority, int queue_index)
 
template<typename scalar_t >
void scale (internal::TargetType< Target::Devices >, blas::real_type< scalar_t > numer, blas::real_type< scalar_t > denom, Matrix< scalar_t > &A, int priority, int queue_index)
 Scale matrix entries by the real scalar numer/denom.
 
template void scale< Target::HostTask, float > (float numer, float denom, Matrix< float > &&A, int priority, int queue_index)
 
template void scale< Target::HostNest, float > (float numer, float denom, Matrix< float > &&A, int priority, int queue_index)
 
template void scale< Target::HostBatch, float > (float numer, float denom, Matrix< float > &&A, int priority, int queue_index)
 
template void scale< Target::Devices, float > (float numer, float denom, Matrix< float > &&A, int priority, int queue_index)
 
template void scale< Target::HostTask, double > (double numer, double denom, Matrix< double > &&A, int priority, int queue_index)
 
template void scale< Target::HostNest, double > (double numer, double denom, Matrix< double > &&A, int priority, int queue_index)
 
template void scale< Target::HostBatch, double > (double numer, double denom, Matrix< double > &&A, int priority, int queue_index)
 
template void scale< Target::Devices, double > (double numer, double denom, Matrix< double > &&A, int priority, int queue_index)
 
template void scale< Target::HostTask, std::complex< float > > (float numer, float denom, Matrix< std::complex< float > > &&A, int priority, int queue_index)
 
template void scale< Target::HostNest, std::complex< float > > (float numer, float denom, Matrix< std::complex< float > > &&A, int priority, int queue_index)
 
template void scale< Target::HostBatch, std::complex< float > > (float numer, float denom, Matrix< std::complex< float > > &&A, int priority, int queue_index)
 
template void scale< Target::Devices, std::complex< float > > (float numer, float denom, Matrix< std::complex< float > > &&A, int priority, int queue_index)
 
template void scale< Target::HostTask, std::complex< double > > (double numer, double denom, Matrix< std::complex< double > > &&A, int priority, int queue_index)
 
template void scale< Target::HostNest, std::complex< double > > (double numer, double denom, Matrix< std::complex< double > > &&A, int priority, int queue_index)
 
template void scale< Target::HostBatch, std::complex< double > > (double numer, double denom, Matrix< std::complex< double > > &&A, int priority, int queue_index)
 
template void scale< Target::Devices, std::complex< double > > (double numer, double denom, Matrix< std::complex< double > > &&A, int priority, int queue_index)
 
template<typename scalar_t , typename scalar_t2 >
void scale_row_col (TargetType< Target::HostTask >, Equed equed, std::vector< scalar_t2 > const &R, std::vector< scalar_t2 > const &C, Matrix< scalar_t > &A)
 Apply row or column scaling, or both, to a Matrix.
 
template<typename scalar_t , typename scalar_t2 >
void scale_row_col (TargetType< Target::Devices >, Equed equed, std::vector< scalar_t2 > const &R, std::vector< scalar_t2 > const &C, Matrix< scalar_t > &A)
 Apply row or column scaling, or both, to a Matrix.
 
template void scale_row_col< Target::HostTask, float > (Equed equed, std::vector< float > const &R, std::vector< float > const &C, Matrix< float > &&A)
 
template void scale_row_col< Target::Devices, float > (Equed equed, std::vector< float > const &R, std::vector< float > const &C, Matrix< float > &&A)
 
template void scale_row_col< Target::HostTask, double > (Equed equed, std::vector< double > const &R, std::vector< double > const &C, Matrix< double > &&A)
 
template void scale_row_col< Target::Devices, double > (Equed equed, std::vector< double > const &R, std::vector< double > const &C, Matrix< double > &&A)
 
template void scale_row_col< Target::HostTask, std::complex< float > > (Equed equed, std::vector< float > const &R, std::vector< float > const &C, Matrix< std::complex< float > > &&A)
 
template void scale_row_col< Target::Devices, std::complex< float > > (Equed equed, std::vector< float > const &R, std::vector< float > const &C, Matrix< std::complex< float > > &&A)
 
template void scale_row_col< Target::HostTask, std::complex< double > > (Equed equed, std::vector< double > const &R, std::vector< double > const &C, Matrix< std::complex< double > > &&A)
 
template void scale_row_col< Target::Devices, std::complex< double > > (Equed equed, std::vector< double > const &R, std::vector< double > const &C, Matrix< std::complex< double > > &&A)
 
template void scale_row_col< Target::HostTask, std::complex< float > > (Equed equed, std::vector< std::complex< float > > const &R, std::vector< std::complex< float > > const &C, Matrix< std::complex< float > > &&A)
 
template void scale_row_col< Target::Devices, std::complex< float > > (Equed equed, std::vector< std::complex< float > > const &R, std::vector< std::complex< float > > const &C, Matrix< std::complex< float > > &&A)
 
template void scale_row_col< Target::HostTask, std::complex< double > > (Equed equed, std::vector< std::complex< double > > const &R, std::vector< std::complex< double > > const &C, Matrix< std::complex< double > > &&A)
 
template void scale_row_col< Target::Devices, std::complex< double > > (Equed equed, std::vector< std::complex< double > > const &R, std::vector< std::complex< double > > const &C, Matrix< std::complex< double > > &&A)
 
template<typename scalar_t >
void set (internal::TargetType< Target::HostTask >, scalar_t offdiag_value, scalar_t diag_value, Matrix< scalar_t > &A, int priority, int queue_index)
 General matrix set.
 
template<typename scalar_t >
void set (internal::TargetType< Target::HostNest >, scalar_t offdiag_value, scalar_t diag_value, Matrix< scalar_t > &A, int priority, int queue_index)
 
template<typename scalar_t >
void set (internal::TargetType< Target::HostBatch >, scalar_t offdiag_value, scalar_t diag_value, Matrix< scalar_t > &A, int priority, int queue_index)
 
template<typename scalar_t >
void set (internal::TargetType< Target::Devices >, scalar_t offdiag_value, scalar_t diag_value, Matrix< scalar_t > &A, int priority, int queue_index)
 General matrix set.
 
template void set< Target::HostTask, float > (float offdiag_value, float diag_value, Matrix< float > &&A, int priority, int queue_index)
 
template void set< Target::HostNest, float > (float offdiag_value, float diag_value, Matrix< float > &&A, int priority, int queue_index)
 
template void set< Target::HostBatch, float > (float offdiag_value, float diag_value, Matrix< float > &&A, int priority, int queue_index)
 
template void set< Target::Devices, float > (float offdiag_value, float diag_value, Matrix< float > &&A, int priority, int queue_index)
 
template void set< Target::HostTask, double > (double offdiag_value, double diag_value, Matrix< double > &&A, int priority, int queue_index)
 
template void set< Target::HostNest, double > (double offdiag_value, double diag_value, Matrix< double > &&A, int priority, int queue_index)
 
template void set< Target::HostBatch, double > (double offdiag_value, double diag_value, Matrix< double > &&A, int priority, int queue_index)
 
template void set< Target::Devices, double > (double offdiag_value, double diag_value, Matrix< double > &&A, int priority, int queue_index)
 
template void set< Target::HostTask, std::complex< float > > (std::complex< float > offdiag_value, std::complex< float > diag_value, Matrix< std::complex< float > > &&A, int priority, int queue_index)
 
template void set< Target::HostNest, std::complex< float > > (std::complex< float > offdiag_value, std::complex< float > diag_value, Matrix< std::complex< float > > &&A, int priority, int queue_index)
 
template void set< Target::HostBatch, std::complex< float > > (std::complex< float > offdiag_value, std::complex< float > diag_value, Matrix< std::complex< float > > &&A, int priority, int queue_index)
 
template void set< Target::Devices, std::complex< float > > (std::complex< float > offdiag_value, std::complex< float > diag_value, Matrix< std::complex< float > > &&A, int priority, int queue_index)
 
template void set< Target::HostTask, std::complex< double > > (std::complex< double > offdiag_value, std::complex< double > diag_value, Matrix< std::complex< double > > &&A, int priority, int queue_index)
 
template void set< Target::HostNest, std::complex< double > > (std::complex< double > offdiag_value, std::complex< double > diag_value, Matrix< std::complex< double > > &&A, int priority, int queue_index)
 
template void set< Target::HostBatch, std::complex< double > > (std::complex< double > offdiag_value, std::complex< double > diag_value, Matrix< std::complex< double > > &&A, int priority, int queue_index)
 
template void set< Target::Devices, std::complex< double > > (std::complex< double > offdiag_value, std::complex< double > diag_value, Matrix< std::complex< double > > &&A, int priority, int queue_index)
 
template<typename scalar_t >
void getrf_panel (internal::TargetType< Target::HostTask >, Matrix< scalar_t > &A, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, blas::real_type< scalar_t > pivot_threshold, int max_panel_threads, int priority, int tag, int64_t *info)
 LU factorization of a column of tiles, host implementation.
 
template void getrf_panel< Target::HostTask, float > (Matrix< float > &&A, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, float pivot_threshold, int max_panel_threads, int priority, int tag, int64_t *info)
 
template void getrf_panel< Target::HostTask, double > (Matrix< double > &&A, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, double pivot_threshold, int max_panel_threads, int priority, int tag, int64_t *info)
 
template void getrf_panel< Target::HostTask, std::complex< float > > (Matrix< std::complex< float > > &&A, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, float pivot_threshold, int max_panel_threads, int priority, int tag, int64_t *info)
 
template void getrf_panel< Target::HostTask, std::complex< double > > (Matrix< std::complex< double > > &&A, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, double pivot_threshold, int max_panel_threads, int priority, int tag, int64_t *info)
 
template<typename scalar_t >
void getrf_nopiv (internal::TargetType< Target::HostTask >, Matrix< scalar_t > &A, int64_t ib, int priority, int64_t *info)
 LU factorization of single tile without pivoting, host implementation.
 
template void getrf_nopiv< Target::HostTask, float > (Matrix< float > &&A, int64_t ib, int priority, int64_t *info)
 
template void getrf_nopiv< Target::HostTask, double > (Matrix< double > &&A, int64_t ib, int priority, int64_t *info)
 
template void getrf_nopiv< Target::HostTask, std::complex< float > > (Matrix< std::complex< float > > &&A, int64_t ib, int priority, int64_t *info)
 
template void getrf_nopiv< Target::HostTask, std::complex< double > > (Matrix< std::complex< double > > &&A, int64_t ib, int priority, int64_t *info)
 
template<typename scalar_t >
void permutation_to_sequential_pivot (std::vector< AuxPivot< scalar_t > > &aux_pivot, int64_t diag_len, int mt, int64_t mb)
 Convert pivot rows (i.e., permutation of 0, ..., m-1) to sequence of row-swaps to be applied to a matrix (i.e., LAPACK-style sequential pivots), for m = mt * mb rows.
 
template<typename scalar_t >
void getrf_tntpiv_local (internal::TargetType< Target::HostTask >, std::vector< Tile< scalar_t > > &tiles, std::vector< char * > dwork_array, size_t dwork_bytes, int mlocal, int device, lapack::Queue *queue, int64_t diag_len, int64_t ib, int stage, int64_t mb, int64_t nb, std::vector< int64_t > &tile_indices, std::vector< std::vector< AuxPivot< scalar_t > > > &aux_pivot, int mpi_rank, int max_panel_threads, int priority, int64_t *info)
 Multi-threaded LU factorization of local tiles.
 
template<typename scalar_t >
void getrf_tntpiv_local (internal::TargetType< Target::HostBatch >, std::vector< Tile< scalar_t > > &tiles, std::vector< char * > dwork_array, size_t dwork_bytes, int mlocal, int device, lapack::Queue *queue, int64_t diag_len, int64_t ib, int stage, int64_t mb, int64_t nb, std::vector< int64_t > &tile_indices, std::vector< std::vector< AuxPivot< scalar_t > > > &aux_pivot, int mpi_rank, int max_panel_threads, int priority, int64_t *info)
 Forward HostBatch => HostTask.
 
template<typename scalar_t >
void getrf_tntpiv_local (internal::TargetType< Target::HostNest >, std::vector< Tile< scalar_t > > &tiles, std::vector< char * > dwork_array, size_t dwork_bytes, int mlocal, int device, lapack::Queue *queue, int64_t diag_len, int64_t ib, int stage, int64_t mb, int64_t nb, std::vector< int64_t > &tile_indices, std::vector< std::vector< AuxPivot< scalar_t > > > &aux_pivot, int mpi_rank, int max_panel_threads, int priority, int64_t *info)
 Forward HostNest => HostTask.
 
template<typename scalar_t >
void getrf_tntpiv_local (internal::TargetType< Target::Devices >, std::vector< Tile< scalar_t > > &tiles, std::vector< char * > dwork_array, size_t dwork_bytes, int mlocal, int device, lapack::Queue *queue, int64_t diag_len, int64_t ib, int stage, int64_t mb, int64_t nb, std::vector< int64_t > &tile_indices, std::vector< std::vector< AuxPivot< scalar_t > > > &aux_pivot, int mpi_rank, int max_panel_threads, int priority, int64_t *info)
 
template<Target target, typename scalar_t >
void getrf_tntpiv_panel (internal::TargetType< target >, Matrix< scalar_t > &A, Matrix< scalar_t > &Awork, std::vector< char * > dwork_array, size_t work_bytes, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, int max_panel_threads, int priority, int64_t *info)
 LU factorization of a column of tiles.
 
template void getrf_tntpiv_panel< Target::HostTask, float > (Matrix< float > &&A, Matrix< float > &&Awork, std::vector< char * > dwork_array, size_t work_bytes, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, int max_panel_threads, int priority, int64_t *info)
 
template void getrf_tntpiv_panel< Target::HostTask, double > (Matrix< double > &&A, Matrix< double > &&Awork, std::vector< char * > dwork_array, size_t work_bytes, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, int max_panel_threads, int priority, int64_t *info)
 
template void getrf_tntpiv_panel< Target::HostTask, std::complex< float > > (Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&Awork, std::vector< char * > dwork_array, size_t work_bytes, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, int max_panel_threads, int priority, int64_t *info)
 
template void getrf_tntpiv_panel< Target::HostTask, std::complex< double > > (Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&Awork, std::vector< char * > dwork_array, size_t work_bytes, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, int max_panel_threads, int priority, int64_t *info)
 
template void getrf_tntpiv_panel< Target::HostNest, float > (Matrix< float > &&A, Matrix< float > &&Awork, std::vector< char * > dwork_array, size_t work_bytes, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, int max_panel_threads, int priority, int64_t *info)
 
template void getrf_tntpiv_panel< Target::HostNest, double > (Matrix< double > &&A, Matrix< double > &&Awork, std::vector< char * > dwork_array, size_t work_bytes, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, int max_panel_threads, int priority, int64_t *info)
 
template void getrf_tntpiv_panel< Target::HostNest, std::complex< float > > (Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&Awork, std::vector< char * > dwork_array, size_t work_bytes, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, int max_panel_threads, int priority, int64_t *info)
 
template void getrf_tntpiv_panel< Target::HostNest, std::complex< double > > (Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&Awork, std::vector< char * > dwork_array, size_t work_bytes, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, int max_panel_threads, int priority, int64_t *info)
 
template void getrf_tntpiv_panel< Target::HostBatch, float > (Matrix< float > &&A, Matrix< float > &&Awork, std::vector< char * > dwork_array, size_t work_bytes, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, int max_panel_threads, int priority, int64_t *info)
 
template void getrf_tntpiv_panel< Target::HostBatch, double > (Matrix< double > &&A, Matrix< double > &&Awork, std::vector< char * > dwork_array, size_t work_bytes, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, int max_panel_threads, int priority, int64_t *info)
 
template void getrf_tntpiv_panel< Target::HostBatch, std::complex< float > > (Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&Awork, std::vector< char * > dwork_array, size_t work_bytes, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, int max_panel_threads, int priority, int64_t *info)
 
template void getrf_tntpiv_panel< Target::HostBatch, std::complex< double > > (Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&Awork, std::vector< char * > dwork_array, size_t work_bytes, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, int max_panel_threads, int priority, int64_t *info)
 
template void getrf_tntpiv_panel< Target::Devices, float > (Matrix< float > &&A, Matrix< float > &&Awork, std::vector< char * > dwork_array, size_t work_bytes, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, int max_panel_threads, int priority, int64_t *info)
 
template void getrf_tntpiv_panel< Target::Devices, double > (Matrix< double > &&A, Matrix< double > &&Awork, std::vector< char * > dwork_array, size_t work_bytes, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, int max_panel_threads, int priority, int64_t *info)
 
template void getrf_tntpiv_panel< Target::Devices, std::complex< float > > (Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&Awork, std::vector< char * > dwork_array, size_t work_bytes, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, int max_panel_threads, int priority, int64_t *info)
 
template void getrf_tntpiv_panel< Target::Devices, std::complex< double > > (Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&Awork, std::vector< char * > dwork_array, size_t work_bytes, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, int max_panel_threads, int priority, int64_t *info)
 
template<typename scalar_t >
void norm (internal::TargetType< Target::HostTask >, Norm in_norm, NormScope scope, HermitianBandMatrix< scalar_t > &A, blas::real_type< scalar_t > *values, int priority, int queue_index)
 Hermitian matrix norm.
 
template<typename scalar_t >
void norm (internal::TargetType< Target::HostNest >, Norm in_norm, NormScope scope, HermitianBandMatrix< scalar_t > &A, blas::real_type< scalar_t > *values, int priority, int queue_index)
 Hermitian banded matrix norm.
 
template<typename scalar_t >
void norm (internal::TargetType< Target::Devices >, Norm in_norm, NormScope scope, HermitianBandMatrix< scalar_t > &A, blas::real_type< scalar_t > *values, int priority, int queue_index)
 Hermitian banded matrix norm.
 
template void norm< Target::HostTask, float > (Norm in_norm, NormScope scope, HermitianBandMatrix< float > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::HostNest, float > (Norm in_norm, NormScope scope, HermitianBandMatrix< float > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::Devices, float > (Norm in_norm, NormScope scope, HermitianBandMatrix< float > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::HostTask, double > (Norm in_norm, NormScope scope, HermitianBandMatrix< double > &&A, double *values, int priority, int queue_index)
 
template void norm< Target::HostNest, double > (Norm in_norm, NormScope scope, HermitianBandMatrix< double > &&A, double *values, int priority, int queue_index)
 
template void norm< Target::Devices, double > (Norm in_norm, NormScope scope, HermitianBandMatrix< double > &&A, double *values, int priority, int queue_index)
 
template void norm< Target::HostTask, std::complex< float > > (Norm in_norm, NormScope scope, HermitianBandMatrix< std::complex< float > > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::HostNest, std::complex< float > > (Norm in_norm, NormScope scope, HermitianBandMatrix< std::complex< float > > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::Devices, std::complex< float > > (Norm in_norm, NormScope scope, HermitianBandMatrix< std::complex< float > > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::HostTask, std::complex< double > > (Norm in_norm, NormScope scope, HermitianBandMatrix< std::complex< double > > &&A, double *values, int priority, int queue_index)
 
template void norm< Target::HostNest, std::complex< double > > (Norm in_norm, NormScope scope, HermitianBandMatrix< std::complex< double > > &&A, double *values, int priority, int queue_index)
 
template void norm< Target::Devices, std::complex< double > > (Norm in_norm, NormScope scope, HermitianBandMatrix< std::complex< double > > &&A, double *values, int priority, int queue_index)
 
template<typename scalar_t >
void he2hb_gemm (internal::TargetType< Target::HostTask >, scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, Matrix< scalar_t > &C, int panel_rank, int priority, int64_t queue_index)
 Inner product C = AB, Host OpenMP task implementation.
 
template<typename scalar_t >
void he2hb_gemm (internal::TargetType< Target::Devices >, scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, Matrix< scalar_t > &C, int panel_rank, int priority, int64_t queue_index)
 Inner product C = AB, Device implementation.
 
template void he2hb_gemm< Target::HostTask, float > (float alpha, Matrix< float > &&A, Matrix< float > &&B, float beta, Matrix< float > &&C, int panel_rank, int priority, int64_t queue_index)
 
template void he2hb_gemm< Target::HostTask, double > (double alpha, Matrix< double > &&A, Matrix< double > &&B, double beta, Matrix< double > &&C, int panel_rank, int priority, int64_t queue_index)
 
template void he2hb_gemm< Target::HostTask, std::complex< float > > (std::complex< float > alpha, Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, std::complex< float > beta, Matrix< std::complex< float > > &&C, int panel_rank, int priority, int64_t queue_index)
 
template void he2hb_gemm< Target::HostTask, std::complex< double > > (std::complex< double > alpha, Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, std::complex< double > beta, Matrix< std::complex< double > > &&C, int panel_rank, int priority, int64_t queue_index)
 
template void he2hb_gemm< Target::Devices, float > (float alpha, Matrix< float > &&A, Matrix< float > &&B, float beta, Matrix< float > &&C, int panel_rank, int priority, int64_t queue_index)
 
template void he2hb_gemm< Target::Devices, double > (double alpha, Matrix< double > &&A, Matrix< double > &&B, double beta, Matrix< double > &&C, int panel_rank, int priority, int64_t queue_index)
 
template void he2hb_gemm< Target::Devices, std::complex< float > > (std::complex< float > alpha, Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, std::complex< float > beta, Matrix< std::complex< float > > &&C, int panel_rank, int priority, int64_t queue_index)
 
template void he2hb_gemm< Target::Devices, std::complex< double > > (std::complex< double > alpha, Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, std::complex< double > beta, Matrix< std::complex< double > > &&C, int panel_rank, int priority, int64_t queue_index)
 
template<typename scalar_t >
void he2hb_hemm (internal::TargetType< Target::HostTask >, HermitianMatrix< scalar_t > &A, Matrix< scalar_t > &B, Matrix< scalar_t > &C, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 Apply local reflectors.
 
template<typename scalar_t >
void he2hb_hemm (internal::TargetType< Target::Devices >, HermitianMatrix< scalar_t > &A, Matrix< scalar_t > &B, Matrix< scalar_t > &C, std::vector< int64_t > panel_rank_rows, int priority, int64_t queue_index)
 Apply local reflectors.
 
template void he2hb_hemm< Target::HostTask, float > (HermitianMatrix< float > &&A, Matrix< float > &&B, Matrix< float > &&C, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 
template void he2hb_hemm< Target::HostTask, double > (HermitianMatrix< double > &&A, Matrix< double > &&B, Matrix< double > &&C, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 
template void he2hb_hemm< Target::HostTask, std::complex< float > > (HermitianMatrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, Matrix< std::complex< float > > &&C, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 
template void he2hb_hemm< Target::HostTask, std::complex< double > > (HermitianMatrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, Matrix< std::complex< double > > &&C, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 
template void he2hb_hemm< Target::Devices, float > (HermitianMatrix< float > &&A, Matrix< float > &&B, Matrix< float > &&C, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 
template void he2hb_hemm< Target::Devices, double > (HermitianMatrix< double > &&A, Matrix< double > &&B, Matrix< double > &&C, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 
template void he2hb_hemm< Target::Devices, std::complex< float > > (HermitianMatrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, Matrix< std::complex< float > > &&C, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 
template void he2hb_hemm< Target::Devices, std::complex< double > > (HermitianMatrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, Matrix< std::complex< double > > &&C, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 
template<typename scalar_t >
void he2hb_her2k_offdiag_ranks (internal::TargetType< Target::HostTask >, scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, HermitianMatrix< scalar_t > &C, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 matrix multiply to update trailing matrix, except the diagonal tiles.
 
template<typename scalar_t >
void he2hb_her2k_offdiag_ranks (internal::TargetType< Target::Devices >, scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, HermitianMatrix< scalar_t > &C, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 matrix multiply to update trailing matrix, except the diagonal tiles.
 
template void he2hb_her2k_offdiag_ranks< Target::HostTask, float > (float alpha, Matrix< float > &&A, Matrix< float > &&B, float beta, HermitianMatrix< float > &&C, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 
template void he2hb_her2k_offdiag_ranks< Target::HostTask, double > (double alpha, Matrix< double > &&A, Matrix< double > &&B, double beta, HermitianMatrix< double > &&C, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 
template void he2hb_her2k_offdiag_ranks< Target::HostTask, std::complex< float > > (std::complex< float > alpha, Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, std::complex< float > beta, HermitianMatrix< std::complex< float > > &&C, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 
template void he2hb_her2k_offdiag_ranks< Target::HostTask, std::complex< double > > (std::complex< double > alpha, Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, std::complex< double > beta, HermitianMatrix< std::complex< double > > &&C, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 
template void he2hb_her2k_offdiag_ranks< Target::Devices, float > (float alpha, Matrix< float > &&A, Matrix< float > &&B, float beta, HermitianMatrix< float > &&C, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 
template void he2hb_her2k_offdiag_ranks< Target::Devices, double > (double alpha, Matrix< double > &&A, Matrix< double > &&B, double beta, HermitianMatrix< double > &&C, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 
template void he2hb_her2k_offdiag_ranks< Target::Devices, std::complex< float > > (std::complex< float > alpha, Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, std::complex< float > beta, HermitianMatrix< std::complex< float > > &&C, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 
template void he2hb_her2k_offdiag_ranks< Target::Devices, std::complex< double > > (std::complex< double > alpha, Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, std::complex< double > beta, HermitianMatrix< std::complex< double > > &&C, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 
template<typename scalar_t >
bool need_Bi0 (HermitianMatrix< scalar_t > AH, int mpi_rank, int64_t i, std::vector< int64_t > &panel_rank_rows)
 Determines whether this process contributes to B(i, 0).
 
template<typename scalar_t >
void he2hb_trmm (internal::TargetType< Target::HostTask >, HermitianMatrix< scalar_t > &AH, Matrix< scalar_t > &A, Matrix< scalar_t > &B, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 Triangular matrix multiply.
 
template<typename scalar_t >
void he2hb_trmm (internal::TargetType< Target::Devices >, HermitianMatrix< scalar_t > &AH, Matrix< scalar_t > &A, Matrix< scalar_t > &B, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 Triangular matrix multiply.
 
template void he2hb_trmm< Target::HostTask, float > (HermitianMatrix< float > &&AH, Matrix< float > &&A, Matrix< float > &&B, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 
template void he2hb_trmm< Target::HostTask, double > (HermitianMatrix< double > &&AH, Matrix< double > &&A, Matrix< double > &&B, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 
template void he2hb_trmm< Target::HostTask, std::complex< float > > (HermitianMatrix< std::complex< float > > &&AH, Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 
template void he2hb_trmm< Target::HostTask, std::complex< double > > (HermitianMatrix< std::complex< double > > &&AH, Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 
template void he2hb_trmm< Target::Devices, float > (HermitianMatrix< float > &&AH, Matrix< float > &&A, Matrix< float > &&B, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 
template void he2hb_trmm< Target::Devices, double > (HermitianMatrix< double > &&AH, Matrix< double > &&A, Matrix< double > &&B, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 
template void he2hb_trmm< Target::Devices, std::complex< float > > (HermitianMatrix< std::complex< float > > &&AH, Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 
template void he2hb_trmm< Target::Devices, std::complex< double > > (HermitianMatrix< std::complex< double > > &&AH, Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, std::vector< int64_t > &panel_rank_rows, int priority, int64_t queue_index)
 
template<typename scalar_t >
void herf (int64_t n, scalar_t *v, HermitianMatrix< scalar_t > &A)
 Applies a Householder reflector \(H = I - \tau v v^H\) to the Hermitian matrix \(A\) on the left and right.
 
template<typename scalar_t >
void hebr1 (internal::TargetType< Target::HostTask >, int64_t n, scalar_t *v, HermitianMatrix< scalar_t > &A, int priority)
 Implements task type 1 in the tridiagonal bulge chasing algorithm, bringing the first column & row of A to tridiagonal.
 
template<typename scalar_t >
void hebr2 (internal::TargetType< Target::HostTask >, int64_t n1, scalar_t *v1, int64_t n2, scalar_t *v2, Matrix< scalar_t > &A, int priority)
 Implements task type 2 in the tridiagonal bulge chasing algorithm, updating an off-diagonal block, which creates a bulge, then bringing its first column back to the original bandwidth.
 
template<typename scalar_t >
void hebr3 (internal::TargetType< Target::HostTask >, int64_t n, scalar_t *v, HermitianMatrix< scalar_t > &A, int priority)
 Implements task type 3 in the tridiagonal bulge chasing algorithm, updating a diagonal block with a 2-sided Householder transformation.
 
template void hebr1< Target::HostTask, float > (int64_t n, float *v1, HermitianMatrix< float > &&A, int priority)
 
template void hebr1< Target::HostTask, double > (int64_t n, double *v1, HermitianMatrix< double > &&A, int priority)
 
template void hebr1< Target::HostTask, std::complex< float > > (int64_t n, std::complex< float > *v1, HermitianMatrix< std::complex< float > > &&A, int priority)
 
template void hebr1< Target::HostTask, std::complex< double > > (int64_t n, std::complex< double > *v1, HermitianMatrix< std::complex< double > > &&A, int priority)
 
template void hebr2< Target::HostTask, float > (int64_t n1, float *v1, int64_t n2, float *v2, Matrix< float > &&A, int priority)
 
template void hebr2< Target::HostTask, double > (int64_t n1, double *v1, int64_t n2, double *v2, Matrix< double > &&A, int priority)
 
template void hebr2< Target::HostTask, std::complex< float > > (int64_t n1, std::complex< float > *v1, int64_t n2, std::complex< float > *v2, Matrix< std::complex< float > > &&A, int priority)
 
template void hebr2< Target::HostTask, std::complex< double > > (int64_t n1, std::complex< double > *v1, int64_t n2, std::complex< double > *v2, Matrix< std::complex< double > > &&A, int priority)
 
template void hebr3< Target::HostTask, float > (int64_t n, float *v, HermitianMatrix< float > &&A, int priority)
 
template void hebr3< Target::HostTask, double > (int64_t n, double *v, HermitianMatrix< double > &&A, int priority)
 
template void hebr3< Target::HostTask, std::complex< float > > (int64_t n, std::complex< float > *v, HermitianMatrix< std::complex< float > > &&A, int priority)
 
template void hebr3< Target::HostTask, std::complex< double > > (int64_t n, std::complex< double > *v, HermitianMatrix< std::complex< double > > &&A, int priority)
 
template<typename scalar_t >
void hegst (internal::TargetType< Target::HostTask >, int64_t itype, HermitianMatrix< scalar_t > &A, HermitianMatrix< scalar_t > &B)
 Reduces a complex Hermitian positive-definite generalized eigenvalue problem to the standard form of single tile, host implementation.
 
template void hegst< Target::HostTask, float > (int64_t itype, HermitianMatrix< float > &&A, HermitianMatrix< float > &&B)
 
template void hegst< Target::HostTask, double > (int64_t itype, HermitianMatrix< double > &&A, HermitianMatrix< double > &&B)
 
template void hegst< Target::HostTask, std::complex< float > > (int64_t itype, HermitianMatrix< std::complex< float > > &&A, HermitianMatrix< std::complex< float > > &&B)
 
template void hegst< Target::HostTask, std::complex< double > > (int64_t itype, HermitianMatrix< std::complex< double > > &&A, HermitianMatrix< std::complex< double > > &&B)
 
template<typename scalar_t >
void hemm (internal::TargetType< Target::HostTask >, Side side, scalar_t alpha, HermitianMatrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, Matrix< scalar_t > &C, int priority)
 Hermitian matrix multiply to update trailing matrix.
 
template<typename scalar_t >
void hemm (internal::TargetType< Target::HostNest >, Side side, scalar_t alpha, HermitianMatrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, Matrix< scalar_t > &C, int priority)
 Hermitian matrix multiply to update trailing matrix.
 
template void hemm< Target::HostTask, float > (Side side, float alpha, HermitianMatrix< float > &&A, Matrix< float > &&B, float beta, Matrix< float > &&C, int priority)
 
template void hemm< Target::HostNest, float > (Side side, float alpha, HermitianMatrix< float > &&A, Matrix< float > &&B, float beta, Matrix< float > &&C, int priority)
 
template void hemm< Target::HostTask, double > (Side side, double alpha, HermitianMatrix< double > &&A, Matrix< double > &&B, double beta, Matrix< double > &&C, int priority)
 
template void hemm< Target::HostNest, double > (Side side, double alpha, HermitianMatrix< double > &&A, Matrix< double > &&B, double beta, Matrix< double > &&C, int priority)
 
template void hemm< Target::HostTask, std::complex< float > > (Side side, std::complex< float > alpha, HermitianMatrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, std::complex< float > beta, Matrix< std::complex< float > > &&C, int priority)
 
template void hemm< Target::HostNest, std::complex< float > > (Side side, std::complex< float > alpha, HermitianMatrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, std::complex< float > beta, Matrix< std::complex< float > > &&C, int priority)
 
template void hemm< Target::HostTask, std::complex< double > > (Side side, std::complex< double > alpha, HermitianMatrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, std::complex< double > beta, Matrix< std::complex< double > > &&C, int priority)
 
template void hemm< Target::HostNest, std::complex< double > > (Side side, std::complex< double > alpha, HermitianMatrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, std::complex< double > beta, Matrix< std::complex< double > > &&C, int priority)
 
template<typename scalar_t >
void hemmA (internal::TargetType< Target::HostTask >, Side side, scalar_t alpha, HermitianMatrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, Matrix< scalar_t > &C, int priority)
 Hermitian matrix multiply to update trailing matrix.
 
template<typename scalar_t >
void hemmA (internal::TargetType< Target::HostNest >, Side side, scalar_t alpha, HermitianMatrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, Matrix< scalar_t > &C, int priority)
 Hermitian matrix multiply to update trailing matrix.
 
template void hemmA< Target::HostTask, float > (Side side, float alpha, HermitianMatrix< float > &&A, Matrix< float > &&B, float beta, Matrix< float > &&C, int priority)
 
template void hemmA< Target::HostNest, float > (Side side, float alpha, HermitianMatrix< float > &&A, Matrix< float > &&B, float beta, Matrix< float > &&C, int priority)
 
template void hemmA< Target::HostTask, double > (Side side, double alpha, HermitianMatrix< double > &&A, Matrix< double > &&B, double beta, Matrix< double > &&C, int priority)
 
template void hemmA< Target::HostNest, double > (Side side, double alpha, HermitianMatrix< double > &&A, Matrix< double > &&B, double beta, Matrix< double > &&C, int priority)
 
template void hemmA< Target::HostTask, std::complex< float > > (Side side, std::complex< float > alpha, HermitianMatrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, std::complex< float > beta, Matrix< std::complex< float > > &&C, int priority)
 
template void hemmA< Target::HostNest, std::complex< float > > (Side side, std::complex< float > alpha, HermitianMatrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, std::complex< float > beta, Matrix< std::complex< float > > &&C, int priority)
 
template void hemmA< Target::HostTask, std::complex< double > > (Side side, std::complex< double > alpha, HermitianMatrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, std::complex< double > beta, Matrix< std::complex< double > > &&C, int priority)
 
template void hemmA< Target::HostNest, std::complex< double > > (Side side, std::complex< double > alpha, HermitianMatrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, std::complex< double > beta, Matrix< std::complex< double > > &&C, int priority)
 
template<typename scalar_t >
void norm (internal::TargetType< Target::HostTask >, Norm in_norm, NormScope scope, HermitianMatrix< scalar_t > &A, blas::real_type< scalar_t > *values, int priority, int queue_index)
 Hermitian matrix norm.
 
template<typename scalar_t >
void norm (internal::TargetType< Target::HostNest >, Norm in_norm, NormScope scope, HermitianMatrix< scalar_t > &A, blas::real_type< scalar_t > *values, int priority, int queue_index)
 Hermitian matrix norm.
 
template<typename scalar_t >
void norm (internal::TargetType< Target::Devices >, Norm in_norm, NormScope scope, HermitianMatrix< scalar_t > &A, blas::real_type< scalar_t > *values, int priority, int queue_index)
 Hermitian matrix norm.
 
template void norm< Target::HostTask, float > (Norm in_norm, NormScope scope, HermitianMatrix< float > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::HostNest, float > (Norm in_norm, NormScope scope, HermitianMatrix< float > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::Devices, float > (Norm in_norm, NormScope scope, HermitianMatrix< float > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::HostTask, double > (Norm in_norm, NormScope scope, HermitianMatrix< double > &&A, double *values, int priority, int queue_index)
 
template void norm< Target::HostNest, double > (Norm in_norm, NormScope scope, HermitianMatrix< double > &&A, double *values, int priority, int queue_index)
 
template void norm< Target::Devices, double > (Norm in_norm, NormScope scope, HermitianMatrix< double > &&A, double *values, int priority, int queue_index)
 
template void norm< Target::HostTask, std::complex< float > > (Norm in_norm, NormScope scope, HermitianMatrix< std::complex< float > > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::HostNest, std::complex< float > > (Norm in_norm, NormScope scope, HermitianMatrix< std::complex< float > > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::Devices, std::complex< float > > (Norm in_norm, NormScope scope, HermitianMatrix< std::complex< float > > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::HostTask, std::complex< double > > (Norm in_norm, NormScope scope, HermitianMatrix< std::complex< double > > &&A, double *values, int priority, int queue_index)
 
template void norm< Target::HostNest, std::complex< double > > (Norm in_norm, NormScope scope, HermitianMatrix< std::complex< double > > &&A, double *values, int priority, int queue_index)
 
template void norm< Target::Devices, std::complex< double > > (Norm in_norm, NormScope scope, HermitianMatrix< std::complex< double > > &&A, double *values, int priority, int queue_index)
 
template<typename scalar_t >
void her2k (internal::TargetType< Target::HostTask >, scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, blas::real_type< scalar_t > beta, HermitianMatrix< scalar_t > &C, int priority, int queue_index, Layout layout)
 Hermitian rank-2k update of single block column (i.e., k = nb).
 
template<typename scalar_t >
void her2k (internal::TargetType< Target::HostNest >, scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, blas::real_type< scalar_t > beta, HermitianMatrix< scalar_t > &C, int priority, int queue_index, Layout layout)
 Hermitian rank-2k update of single block column (i.e., k = nb).
 
template<typename scalar_t >
void her2k (internal::TargetType< Target::HostBatch >, scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, blas::real_type< scalar_t > beta, HermitianMatrix< scalar_t > &C, int priority, int queue_index, Layout layout)
 Hermitian rank-2k update of single block column (i.e., k = nb).
 
template<typename scalar_t >
void her2k (internal::TargetType< Target::Devices >, scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, blas::real_type< scalar_t > beta, HermitianMatrix< scalar_t > &C, int priority, int queue_index, Layout layout)
 Hermitian rank-2k update of single block column (i.e., k = nb).
 
template void her2k< Target::HostTask, float > (float alpha, Matrix< float > &&A, Matrix< float > &&B, float beta, HermitianMatrix< float > &&C, int priority, int queue_index, Layout layout)
 
template void her2k< Target::HostNest, float > (float alpha, Matrix< float > &&A, Matrix< float > &&B, float beta, HermitianMatrix< float > &&C, int priority, int queue_index, Layout layout)
 
template void her2k< Target::HostBatch, float > (float alpha, Matrix< float > &&A, Matrix< float > &&B, float beta, HermitianMatrix< float > &&C, int priority, int queue_index, Layout layout)
 
template void her2k< Target::Devices, float > (float alpha, Matrix< float > &&A, Matrix< float > &&B, float beta, HermitianMatrix< float > &&C, int priority, int queue_index, Layout layout)
 
template void her2k< Target::HostTask, double > (double alpha, Matrix< double > &&A, Matrix< double > &&B, double beta, HermitianMatrix< double > &&C, int priority, int queue_index, Layout layout)
 
template void her2k< Target::HostNest, double > (double alpha, Matrix< double > &&A, Matrix< double > &&B, double beta, HermitianMatrix< double > &&C, int priority, int queue_index, Layout layout)
 
template void her2k< Target::HostBatch, double > (double alpha, Matrix< double > &&A, Matrix< double > &&B, double beta, HermitianMatrix< double > &&C, int priority, int queue_index, Layout layout)
 
template void her2k< Target::Devices, double > (double alpha, Matrix< double > &&A, Matrix< double > &&B, double beta, HermitianMatrix< double > &&C, int priority, int queue_index, Layout layout)
 
template void her2k< Target::HostTask, std::complex< float > > (std::complex< float > alpha, Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, float beta, HermitianMatrix< std::complex< float > > &&C, int priority, int queue_index, Layout layout)
 
template void her2k< Target::HostNest, std::complex< float > > (std::complex< float > alpha, Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, float beta, HermitianMatrix< std::complex< float > > &&C, int priority, int queue_index, Layout layout)
 
template void her2k< Target::HostBatch, std::complex< float > > (std::complex< float > alpha, Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, float beta, HermitianMatrix< std::complex< float > > &&C, int priority, int queue_index, Layout layout)
 
template void her2k< Target::Devices, std::complex< float > > (std::complex< float > alpha, Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, float beta, HermitianMatrix< std::complex< float > > &&C, int priority, int queue_index, Layout layout)
 
template void her2k< Target::HostTask, std::complex< double > > (std::complex< double > alpha, Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, double beta, HermitianMatrix< std::complex< double > > &&C, int priority, int queue_index, Layout layout)
 
template void her2k< Target::HostNest, std::complex< double > > (std::complex< double > alpha, Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, double beta, HermitianMatrix< std::complex< double > > &&C, int priority, int queue_index, Layout layout)
 
template void her2k< Target::HostBatch, std::complex< double > > (std::complex< double > alpha, Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, double beta, HermitianMatrix< std::complex< double > > &&C, int priority, int queue_index, Layout layout)
 
template void her2k< Target::Devices, std::complex< double > > (std::complex< double > alpha, Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, double beta, HermitianMatrix< std::complex< double > > &&C, int priority, int queue_index, Layout layout)
 
template<typename scalar_t >
void herk (internal::TargetType< Target::HostTask >, blas::real_type< scalar_t > alpha, Matrix< scalar_t > &A, blas::real_type< scalar_t > beta, HermitianMatrix< scalar_t > &C, int priority, int queue_index, Layout layout)
 Hermitian rank-k update of single block column (i.e., k = nb).
 
template<typename scalar_t >
void herk (internal::TargetType< Target::HostNest >, blas::real_type< scalar_t > alpha, Matrix< scalar_t > &A, blas::real_type< scalar_t > beta, HermitianMatrix< scalar_t > &C, int priority, int queue_index, Layout layout)
 Hermitian rank-k update of single block column (i.e., k = nb).
 
template<typename scalar_t >
void herk (internal::TargetType< Target::HostBatch >, blas::real_type< scalar_t > alpha, Matrix< scalar_t > &A, blas::real_type< scalar_t > beta, HermitianMatrix< scalar_t > &C, int priority, int queue_index, Layout layout)
 Hermitian rank-k update of single block column (i.e., k = nb).
 
template<typename scalar_t >
void herk (internal::TargetType< Target::Devices >, blas::real_type< scalar_t > alpha, Matrix< scalar_t > &A, blas::real_type< scalar_t > beta, HermitianMatrix< scalar_t > &C, int priority, int queue_index, Layout layout)
 Hermitian rank-k update of single block column (i.e., k = nb).
 
template void herk< Target::HostTask, float > (float alpha, Matrix< float > &&A, float beta, HermitianMatrix< float > &&C, int priority, int queue_index, Layout layout)
 
template void herk< Target::HostNest, float > (float alpha, Matrix< float > &&A, float beta, HermitianMatrix< float > &&C, int priority, int queue_index, Layout layout)
 
template void herk< Target::HostBatch, float > (float alpha, Matrix< float > &&A, float beta, HermitianMatrix< float > &&C, int priority, int queue_index, Layout layout)
 
template void herk< Target::Devices, float > (float alpha, Matrix< float > &&A, float beta, HermitianMatrix< float > &&C, int priority, int queue_index, Layout layout)
 
template void herk< Target::HostTask, double > (double alpha, Matrix< double > &&A, double beta, HermitianMatrix< double > &&C, int priority, int queue_index, Layout layout)
 
template void herk< Target::HostNest, double > (double alpha, Matrix< double > &&A, double beta, HermitianMatrix< double > &&C, int priority, int queue_index, Layout layout)
 
template void herk< Target::HostBatch, double > (double alpha, Matrix< double > &&A, double beta, HermitianMatrix< double > &&C, int priority, int queue_index, Layout layout)
 
template void herk< Target::Devices, double > (double alpha, Matrix< double > &&A, double beta, HermitianMatrix< double > &&C, int priority, int queue_index, Layout layout)
 
template void herk< Target::HostTask, std::complex< float > > (float alpha, Matrix< std::complex< float > > &&A, float beta, HermitianMatrix< std::complex< float > > &&C, int priority, int queue_index, Layout layout)
 
template void herk< Target::HostNest, std::complex< float > > (float alpha, Matrix< std::complex< float > > &&A, float beta, HermitianMatrix< std::complex< float > > &&C, int priority, int queue_index, Layout layout)
 
template void herk< Target::HostBatch, std::complex< float > > (float alpha, Matrix< std::complex< float > > &&A, float beta, HermitianMatrix< std::complex< float > > &&C, int priority, int queue_index, Layout layout)
 
template void herk< Target::Devices, std::complex< float > > (float alpha, Matrix< std::complex< float > > &&A, float beta, HermitianMatrix< std::complex< float > > &&C, int priority, int queue_index, Layout layout)
 
template void herk< Target::HostTask, std::complex< double > > (double alpha, Matrix< std::complex< double > > &&A, double beta, HermitianMatrix< std::complex< double > > &&C, int priority, int queue_index, Layout layout)
 
template void herk< Target::HostNest, std::complex< double > > (double alpha, Matrix< std::complex< double > > &&A, double beta, HermitianMatrix< std::complex< double > > &&C, int priority, int queue_index, Layout layout)
 
template void herk< Target::HostBatch, std::complex< double > > (double alpha, Matrix< std::complex< double > > &&A, double beta, HermitianMatrix< std::complex< double > > &&C, int priority, int queue_index, Layout layout)
 
template void herk< Target::Devices, std::complex< double > > (double alpha, Matrix< std::complex< double > > &&A, double beta, HermitianMatrix< std::complex< double > > &&C, int priority, int queue_index, Layout layout)
 
template<typename scalar_t >
void hettmqr (internal::TargetType< Target::HostTask >, Op op, Matrix< scalar_t > &V, Matrix< scalar_t > &T, HermitianMatrix< scalar_t > &C, int tag_base)
 Distributed multiply Hermitian matrix on left and right by Q from QR triangle-triangle factorization of column of tiles.
 
template void hettmqr< Target::HostTask, float > (Op op, Matrix< float > &&V, Matrix< float > &&T, HermitianMatrix< float > &&C, int tag)
 
template void hettmqr< Target::HostTask, double > (Op op, Matrix< double > &&V, Matrix< double > &&T, HermitianMatrix< double > &&C, int tag)
 
template void hettmqr< Target::HostTask, std::complex< float > > (Op op, Matrix< std::complex< float > > &&V, Matrix< std::complex< float > > &&T, HermitianMatrix< std::complex< float > > &&C, int tag)
 
template void hettmqr< Target::HostTask, std::complex< double > > (Op op, Matrix< std::complex< double > > &&V, Matrix< std::complex< double > > &&T, HermitianMatrix< std::complex< double > > &&C, int tag)
 
template<typename scalar_t >
void norm1est_altsgn (Matrix< scalar_t > &A)
 An auxiliary routine to set the entries of a vector and alternating the vector entries signs.
 
template<typename scalar_t >
void norm1est_set (Matrix< int64_t > &isgn, Matrix< scalar_t > &A)
 An auxiliary routine to replace each entry of a vector by its sign (+1 or -1), for each entry a_i = {1.0, if a_i >=0 {-1.0, if a_i < 0 Store the sign of each entry in isgn vector as well.
 
template void norm1est< float > (Matrix< float > &X, Matrix< float > &V, Matrix< int64_t > &isgn, float *est, int *kase, std::vector< int64_t > &isave)
 
template void norm1est< double > (Matrix< double > &X, Matrix< double > &V, Matrix< int64_t > &isgn, double *est, int *kase, std::vector< int64_t > &isave)
 
template void norm1est< std::complex< float > > (Matrix< std::complex< float > > &X, Matrix< std::complex< float > > &V, Matrix< int64_t > &isgn, float *est, int *kase, std::vector< int64_t > &isave)
 
template void norm1est< std::complex< double > > (Matrix< std::complex< double > > &X, Matrix< std::complex< double > > &V, Matrix< int64_t > &isgn, double *est, int *kase, std::vector< int64_t > &isave)
 
template<typename scalar_t >
int64_t potrf (internal::TargetType< Target::HostTask >, HermitianMatrix< scalar_t > &A, int priority, int64_t queue_index, lapack::device_info_int *device_info)
 Cholesky factorization of single tile, host implementation.
 
template<typename scalar_t >
int64_t potrf (internal::TargetType< Target::Devices >, HermitianMatrix< scalar_t > &A, int priority, int64_t queue_index, lapack::device_info_int *device_info)
 Cholesky factorization of single tile, device implementation.
 
template int64_t potrf< Target::HostTask, float > (HermitianMatrix< float > &&A, int priority, int64_t queue_index, lapack::device_info_int *device_info)
 
template int64_t potrf< Target::HostTask, double > (HermitianMatrix< double > &&A, int priority, int64_t queue_index, lapack::device_info_int *device_info)
 
template int64_t potrf< Target::HostTask, std::complex< float > > (HermitianMatrix< std::complex< float > > &&A, int priority, int64_t queue_index, lapack::device_info_int *device_info)
 
template int64_t potrf< Target::HostTask, std::complex< double > > (HermitianMatrix< std::complex< double > > &&A, int priority, int64_t queue_index, lapack::device_info_int *device_info)
 
template int64_t potrf< Target::Devices, float > (HermitianMatrix< float > &&A, int priority, int64_t queue_index, lapack::device_info_int *device_info)
 
template int64_t potrf< Target::Devices, double > (HermitianMatrix< double > &&A, int priority, int64_t queue_index, lapack::device_info_int *device_info)
 
template int64_t potrf< Target::Devices, std::complex< float > > (HermitianMatrix< std::complex< float > > &&A, int priority, int64_t queue_index, lapack::device_info_int *device_info)
 
template int64_t potrf< Target::Devices, std::complex< double > > (HermitianMatrix< std::complex< double > > &&A, int priority, int64_t queue_index, lapack::device_info_int *device_info)
 
template<typename scalar_t >
void rbt_fill (Matrix< scalar_t > &U, const int64_t seed)
 Allocates and fills a random butterfly transform in packed storage.
 
template std::pair< Matrix< float >, Matrix< float > > rbt_generate (const Matrix< float > &, const int64_t, const int64_t)
 
template std::pair< Matrix< double >, Matrix< double > > rbt_generate (const Matrix< double > &, const int64_t, const int64_t)
 
template std::pair< Matrix< std::complex< float > >, Matrix< std::complex< float > > > rbt_generate (const Matrix< std::complex< float > > &, const int64_t, const int64_t)
 
template std::pair< Matrix< std::complex< double > >, Matrix< std::complex< double > > > rbt_generate (const Matrix< std::complex< double > > &, const int64_t, const int64_t)
 
void makeParallelPivot (Direction direction, std::vector< Pivot > const &pivot, std::map< Pivot, Pivot > &pivot_map)
 Converts serial pivot vector to parallel pivot map.
 
template<typename scalar_t >
void permuteRows (internal::TargetType< Target::HostTask >, Direction direction, Matrix< scalar_t > &A, std::vector< Pivot > &pivot, Layout layout, int priority, int tag_base, int queue_index)
 Permutes rows of a general matrix according to the pivot vector.
 
template<typename scalar_t >
void permuteRows (internal::TargetType< Target::HostNest >, Direction direction, Matrix< scalar_t > &A, std::vector< Pivot > &pivot, Layout layout, int priority, int tag_base, int queue_index)
 
template<typename scalar_t >
void permuteRows (internal::TargetType< Target::HostBatch >, Direction direction, Matrix< scalar_t > &A, std::vector< Pivot > &pivot, Layout layout, int priority, int tag_base, int queue_index)
 
template<typename scalar_t >
void permuteRows (internal::TargetType< Target::Devices >, Direction direction, Matrix< scalar_t > &A, std::vector< Pivot > &pivot, Layout layout, int priority, int tag_base, int queue_index)
 Permutes rows of a general matrix according to the pivot vector.
 
template<typename scalar_t >
void swapRow (int64_t j_offset, int64_t n, HermitianMatrix< scalar_t > &A, Op op1, std::tuple< int64_t, int64_t > &&ij_tuple_1, int64_t offset_i1, Op op2, std::tuple< int64_t, int64_t > &&ij_tuple_2, int64_t offset_i2, int tag)
 Swap a partial row of two tiles, either locally or remotely.
 
template<typename scalar_t >
void swapElement (HermitianMatrix< scalar_t > &A, std::tuple< int64_t, int64_t > &&ij_tuple_1, int64_t offset_i1, int64_t offset_j1, std::tuple< int64_t, int64_t > &&ij_tuple_2, int64_t offset_i2, int64_t offset_j2, int tag)
 Swap a single element of two tiles, either locally or remotely.
 
template<typename scalar_t >
void permuteRowsCols (internal::TargetType< Target::HostTask >, Direction direction, HermitianMatrix< scalar_t > &A, std::vector< Pivot > &pivot, int priority, int tag)
 Permutes rows and cols, symmetrically, of a Hermitian matrix according to the pivot vector.
 
template void permuteRows< Target::HostTask, float > (Direction direction, Matrix< float > &&A, std::vector< Pivot > &pivot, Layout layout, int priority, int tag, int queue_index)
 
template void permuteRows< Target::HostNest, float > (Direction direction, Matrix< float > &&A, std::vector< Pivot > &pivot, Layout layout, int priority, int tag, int queue_index)
 
template void permuteRows< Target::HostBatch, float > (Direction direction, Matrix< float > &&A, std::vector< Pivot > &pivot, Layout layout, int priority, int tag, int queue_index)
 
template void permuteRows< Target::Devices, float > (Direction direction, Matrix< float > &&A, std::vector< Pivot > &pivot, Layout layout, int priority, int tag, int queue_index)
 
template void permuteRows< Target::HostTask, double > (Direction direction, Matrix< double > &&A, std::vector< Pivot > &pivot, Layout layout, int priority, int tag, int queue_index)
 
template void permuteRows< Target::HostNest, double > (Direction direction, Matrix< double > &&A, std::vector< Pivot > &pivot, Layout layout, int priority, int tag, int queue_index)
 
template void permuteRows< Target::HostBatch, double > (Direction direction, Matrix< double > &&A, std::vector< Pivot > &pivot, Layout layout, int priority, int tag, int queue_index)
 
template void permuteRows< Target::Devices, double > (Direction direction, Matrix< double > &&A, std::vector< Pivot > &pivot, Layout layout, int priority, int tag, int queue_index)
 
template void permuteRows< Target::HostTask, std::complex< float > > (Direction direction, Matrix< std::complex< float > > &&A, std::vector< Pivot > &pivot, Layout layout, int priority, int tag, int queue_index)
 
template void permuteRows< Target::HostNest, std::complex< float > > (Direction direction, Matrix< std::complex< float > > &&A, std::vector< Pivot > &pivot, Layout layout, int priority, int tag, int queue_index)
 
template void permuteRows< Target::HostBatch, std::complex< float > > (Direction direction, Matrix< std::complex< float > > &&A, std::vector< Pivot > &pivot, Layout layout, int priority, int tag, int queue_index)
 
template void permuteRows< Target::Devices, std::complex< float > > (Direction direction, Matrix< std::complex< float > > &&A, std::vector< Pivot > &pivot, Layout layout, int priority, int tag, int queue_index)
 
template void permuteRows< Target::HostTask, std::complex< double > > (Direction direction, Matrix< std::complex< double > > &&A, std::vector< Pivot > &pivot, Layout layout, int priority, int tag, int queue_index)
 
template void permuteRows< Target::HostNest, std::complex< double > > (Direction direction, Matrix< std::complex< double > > &&A, std::vector< Pivot > &pivot, Layout layout, int priority, int tag, int queue_index)
 
template void permuteRows< Target::HostBatch, std::complex< double > > (Direction direction, Matrix< std::complex< double > > &&A, std::vector< Pivot > &pivot, Layout layout, int priority, int tag, int queue_index)
 
template void permuteRows< Target::Devices, std::complex< double > > (Direction direction, Matrix< std::complex< double > > &&A, std::vector< Pivot > &pivot, Layout layout, int priority, int tag, int queue_index)
 
template void permuteRowsCols< Target::HostTask, float > (Direction direction, HermitianMatrix< float > &&A, std::vector< Pivot > &pivot, int priority, int tag)
 
template void permuteRowsCols< Target::HostTask, double > (Direction direction, HermitianMatrix< double > &&A, std::vector< Pivot > &pivot, int priority, int tag)
 
template void permuteRowsCols< Target::HostTask, std::complex< float > > (Direction direction, HermitianMatrix< std::complex< float > > &&A, std::vector< Pivot > &pivot, int priority, int tag)
 
template void permuteRowsCols< Target::HostTask, std::complex< double > > (Direction direction, HermitianMatrix< std::complex< double > > &&A, std::vector< Pivot > &pivot, int priority, int tag)
 
template<typename scalar_t >
void symm (internal::TargetType< Target::HostTask >, Side side, scalar_t alpha, SymmetricMatrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, Matrix< scalar_t > &C, int priority)
 Symmetric matrix multiply to update trailing matrix.
 
template<typename scalar_t >
void symm (internal::TargetType< Target::HostNest >, Side side, scalar_t alpha, SymmetricMatrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, Matrix< scalar_t > &C, int priority)
 Symmetric matrix multiply to update trailing matrix.
 
template void symm< Target::HostTask, float > (Side side, float alpha, SymmetricMatrix< float > &&A, Matrix< float > &&B, float beta, Matrix< float > &&C, int priority)
 
template void symm< Target::HostNest, float > (Side side, float alpha, SymmetricMatrix< float > &&A, Matrix< float > &&B, float beta, Matrix< float > &&C, int priority)
 
template void symm< Target::HostTask, double > (Side side, double alpha, SymmetricMatrix< double > &&A, Matrix< double > &&B, double beta, Matrix< double > &&C, int priority)
 
template void symm< Target::HostNest, double > (Side side, double alpha, SymmetricMatrix< double > &&A, Matrix< double > &&B, double beta, Matrix< double > &&C, int priority)
 
template void symm< Target::HostTask, std::complex< float > > (Side side, std::complex< float > alpha, SymmetricMatrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, std::complex< float > beta, Matrix< std::complex< float > > &&C, int priority)
 
template void symm< Target::HostNest, std::complex< float > > (Side side, std::complex< float > alpha, SymmetricMatrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, std::complex< float > beta, Matrix< std::complex< float > > &&C, int priority)
 
template void symm< Target::HostTask, std::complex< double > > (Side side, std::complex< double > alpha, SymmetricMatrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, std::complex< double > beta, Matrix< std::complex< double > > &&C, int priority)
 
template void symm< Target::HostNest, std::complex< double > > (Side side, std::complex< double > alpha, SymmetricMatrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, std::complex< double > beta, Matrix< std::complex< double > > &&C, int priority)
 
template<typename scalar_t >
void norm (internal::TargetType< Target::HostTask >, Norm in_norm, NormScope scope, SymmetricMatrix< scalar_t > &A, blas::real_type< scalar_t > *values, int priority, int queue_index)
 Symmetric matrix norm.
 
template<typename scalar_t >
void norm (internal::TargetType< Target::HostNest >, Norm in_norm, NormScope scope, SymmetricMatrix< scalar_t > &A, blas::real_type< scalar_t > *values, int priority, int queue_index)
 Symmetric matrix norm.
 
template<typename scalar_t >
void norm (internal::TargetType< Target::Devices >, Norm in_norm, NormScope scope, SymmetricMatrix< scalar_t > &A, blas::real_type< scalar_t > *values, int priority, int queue_index)
 Symmetric matrix norm.
 
template void norm< Target::HostTask, float > (Norm in_norm, NormScope scope, SymmetricMatrix< float > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::HostNest, float > (Norm in_norm, NormScope scope, SymmetricMatrix< float > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::Devices, float > (Norm in_norm, NormScope scope, SymmetricMatrix< float > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::HostTask, double > (Norm in_norm, NormScope scope, SymmetricMatrix< double > &&A, double *values, int priority, int queue_index)
 
template void norm< Target::HostNest, double > (Norm in_norm, NormScope scope, SymmetricMatrix< double > &&A, double *values, int priority, int queue_index)
 
template void norm< Target::Devices, double > (Norm in_norm, NormScope scope, SymmetricMatrix< double > &&A, double *values, int priority, int queue_index)
 
template void norm< Target::HostTask, std::complex< float > > (Norm in_norm, NormScope scope, SymmetricMatrix< std::complex< float > > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::HostNest, std::complex< float > > (Norm in_norm, NormScope scope, SymmetricMatrix< std::complex< float > > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::Devices, std::complex< float > > (Norm in_norm, NormScope scope, SymmetricMatrix< std::complex< float > > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::HostTask, std::complex< double > > (Norm in_norm, NormScope scope, SymmetricMatrix< std::complex< double > > &&A, double *values, int priority, int queue_index)
 
template void norm< Target::HostNest, std::complex< double > > (Norm in_norm, NormScope scope, SymmetricMatrix< std::complex< double > > &&A, double *values, int priority, int queue_index)
 
template void norm< Target::Devices, std::complex< double > > (Norm in_norm, NormScope scope, SymmetricMatrix< std::complex< double > > &&A, double *values, int priority, int queue_index)
 
template<typename scalar_t >
void syr2k (internal::TargetType< Target::HostTask >, scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, SymmetricMatrix< scalar_t > &C, int priority, int queue_index, Layout layout)
 Symmetric rank-2k update of single block column (i.e., k = nb).
 
template<typename scalar_t >
void syr2k (internal::TargetType< Target::HostNest >, scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, SymmetricMatrix< scalar_t > &C, int priority, int queue_index, Layout layout)
 Symmetric rank-2k update of single block column (i.e., k = nb).
 
template<typename scalar_t >
void syr2k (internal::TargetType< Target::HostBatch >, scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, SymmetricMatrix< scalar_t > &C, int priority, int queue_index, Layout layout)
 Symmetric rank-2k update of single block column (i.e., k = nb).
 
template<typename scalar_t >
void syr2k (internal::TargetType< Target::Devices >, scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, SymmetricMatrix< scalar_t > &C, int priority, int queue_index, Layout layout)
 Symmetric rank-2k update of single block column (i.e., k = nb).
 
template void syr2k< Target::HostTask, float > (float alpha, Matrix< float > &&A, Matrix< float > &&B, float beta, SymmetricMatrix< float > &&C, int priority, int queue_index, Layout layout)
 
template void syr2k< Target::HostNest, float > (float alpha, Matrix< float > &&A, Matrix< float > &&B, float beta, SymmetricMatrix< float > &&C, int priority, int queue_index, Layout layout)
 
template void syr2k< Target::HostBatch, float > (float alpha, Matrix< float > &&A, Matrix< float > &&B, float beta, SymmetricMatrix< float > &&C, int priority, int queue_index, Layout layout)
 
template void syr2k< Target::Devices, float > (float alpha, Matrix< float > &&A, Matrix< float > &&B, float beta, SymmetricMatrix< float > &&C, int priority, int queue_index, Layout layout)
 
template void syr2k< Target::HostTask, double > (double alpha, Matrix< double > &&A, Matrix< double > &&B, double beta, SymmetricMatrix< double > &&C, int priority, int queue_index, Layout layout)
 
template void syr2k< Target::HostNest, double > (double alpha, Matrix< double > &&A, Matrix< double > &&B, double beta, SymmetricMatrix< double > &&C, int priority, int queue_index, Layout layout)
 
template void syr2k< Target::HostBatch, double > (double alpha, Matrix< double > &&A, Matrix< double > &&B, double beta, SymmetricMatrix< double > &&C, int priority, int queue_index, Layout layout)
 
template void syr2k< Target::Devices, double > (double alpha, Matrix< double > &&A, Matrix< double > &&B, double beta, SymmetricMatrix< double > &&C, int priority, int queue_index, Layout layout)
 
template void syr2k< Target::HostTask, std::complex< float > > (std::complex< float > alpha, Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, std::complex< float > beta, SymmetricMatrix< std::complex< float > > &&C, int priority, int queue_index, Layout layout)
 
template void syr2k< Target::HostNest, std::complex< float > > (std::complex< float > alpha, Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, std::complex< float > beta, SymmetricMatrix< std::complex< float > > &&C, int priority, int queue_index, Layout layout)
 
template void syr2k< Target::HostBatch, std::complex< float > > (std::complex< float > alpha, Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, std::complex< float > beta, SymmetricMatrix< std::complex< float > > &&C, int priority, int queue_index, Layout layout)
 
template void syr2k< Target::Devices, std::complex< float > > (std::complex< float > alpha, Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, std::complex< float > beta, SymmetricMatrix< std::complex< float > > &&C, int priority, int queue_index, Layout layout)
 
template void syr2k< Target::HostTask, std::complex< double > > (std::complex< double > alpha, Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, std::complex< double > beta, SymmetricMatrix< std::complex< double > > &&C, int priority, int queue_index, Layout layout)
 
template void syr2k< Target::HostNest, std::complex< double > > (std::complex< double > alpha, Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, std::complex< double > beta, SymmetricMatrix< std::complex< double > > &&C, int priority, int queue_index, Layout layout)
 
template void syr2k< Target::HostBatch, std::complex< double > > (std::complex< double > alpha, Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, std::complex< double > beta, SymmetricMatrix< std::complex< double > > &&C, int priority, int queue_index, Layout layout)
 
template void syr2k< Target::Devices, std::complex< double > > (std::complex< double > alpha, Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, std::complex< double > beta, SymmetricMatrix< std::complex< double > > &&C, int priority, int queue_index, Layout layout)
 
template<typename scalar_t >
void syrk (internal::TargetType< Target::HostTask >, scalar_t alpha, Matrix< scalar_t > &A, scalar_t beta, SymmetricMatrix< scalar_t > &C, int priority, int queue_index, Layout layout)
 Symmetric rank-k update of single block column (i.e., k = nb).
 
template<typename scalar_t >
void syrk (internal::TargetType< Target::HostNest >, scalar_t alpha, Matrix< scalar_t > &A, scalar_t beta, SymmetricMatrix< scalar_t > &C, int priority, int queue_index, Layout layout)
 Symmetric rank-k update of single block column (i.e., k = nb).
 
template<typename scalar_t >
void syrk (internal::TargetType< Target::HostBatch >, scalar_t alpha, Matrix< scalar_t > &A, scalar_t beta, SymmetricMatrix< scalar_t > &C, int priority, int queue_index, Layout layout)
 Symmetric rank-k update of single block column (i.e., k = nb).
 
template<typename scalar_t >
void syrk (internal::TargetType< Target::Devices >, scalar_t alpha, Matrix< scalar_t > &A, scalar_t beta, SymmetricMatrix< scalar_t > &C, int priority, int queue_index, Layout layout)
 Symmetric rank-k update of single block column (i.e., k = nb).
 
template void syrk< Target::HostTask, float > (float alpha, Matrix< float > &&A, float beta, SymmetricMatrix< float > &&C, int priority, int queue_index, Layout layout)
 
template void syrk< Target::HostNest, float > (float alpha, Matrix< float > &&A, float beta, SymmetricMatrix< float > &&C, int priority, int queue_index, Layout layout)
 
template void syrk< Target::HostBatch, float > (float alpha, Matrix< float > &&A, float beta, SymmetricMatrix< float > &&C, int priority, int queue_index, Layout layout)
 
template void syrk< Target::Devices, float > (float alpha, Matrix< float > &&A, float beta, SymmetricMatrix< float > &&C, int priority, int queue_index, Layout layout)
 
template void syrk< Target::HostTask, double > (double alpha, Matrix< double > &&A, double beta, SymmetricMatrix< double > &&C, int priority, int queue_index, Layout layout)
 
template void syrk< Target::HostNest, double > (double alpha, Matrix< double > &&A, double beta, SymmetricMatrix< double > &&C, int priority, int queue_index, Layout layout)
 
template void syrk< Target::HostBatch, double > (double alpha, Matrix< double > &&A, double beta, SymmetricMatrix< double > &&C, int priority, int queue_index, Layout layout)
 
template void syrk< Target::Devices, double > (double alpha, Matrix< double > &&A, double beta, SymmetricMatrix< double > &&C, int priority, int queue_index, Layout layout)
 
template void syrk< Target::HostTask, std::complex< float > > (std::complex< float > alpha, Matrix< std::complex< float > > &&A, std::complex< float > beta, SymmetricMatrix< std::complex< float > > &&C, int priority, int queue_index, Layout layout)
 
template void syrk< Target::HostNest, std::complex< float > > (std::complex< float > alpha, Matrix< std::complex< float > > &&A, std::complex< float > beta, SymmetricMatrix< std::complex< float > > &&C, int priority, int queue_index, Layout layout)
 
template void syrk< Target::HostBatch, std::complex< float > > (std::complex< float > alpha, Matrix< std::complex< float > > &&A, std::complex< float > beta, SymmetricMatrix< std::complex< float > > &&C, int priority, int queue_index, Layout layout)
 
template void syrk< Target::Devices, std::complex< float > > (std::complex< float > alpha, Matrix< std::complex< float > > &&A, std::complex< float > beta, SymmetricMatrix< std::complex< float > > &&C, int priority, int queue_index, Layout layout)
 
template void syrk< Target::HostTask, std::complex< double > > (std::complex< double > alpha, Matrix< std::complex< double > > &&A, std::complex< double > beta, SymmetricMatrix< std::complex< double > > &&C, int priority, int queue_index, Layout layout)
 
template void syrk< Target::HostNest, std::complex< double > > (std::complex< double > alpha, Matrix< std::complex< double > > &&A, std::complex< double > beta, SymmetricMatrix< std::complex< double > > &&C, int priority, int queue_index, Layout layout)
 
template void syrk< Target::HostBatch, std::complex< double > > (std::complex< double > alpha, Matrix< std::complex< double > > &&A, std::complex< double > beta, SymmetricMatrix< std::complex< double > > &&C, int priority, int queue_index, Layout layout)
 
template void syrk< Target::Devices, std::complex< double > > (std::complex< double > alpha, Matrix< std::complex< double > > &&A, std::complex< double > beta, SymmetricMatrix< std::complex< double > > &&C, int priority, int queue_index, Layout layout)
 
template<typename scalar_t >
void trmm (internal::TargetType< Target::HostTask >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, int64_t queue_index)
 Triangular matrix multiply.
 
template<typename scalar_t >
void trmm (internal::TargetType< Target::HostNest >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, int64_t queue_index)
 Triangular matrix multiply.
 
template<typename scalar_t >
void trmm (internal::TargetType< Target::HostBatch >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, int64_t queue_index)
 Triangular matrix multiply.
 
template<typename scalar_t >
void trmm (internal::TargetType< Target::Devices >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, int64_t queue_index)
 Triangular matrix multiply.
 
template void trmm< Target::HostTask, float > (Side side, float alpha, TriangularMatrix< float > &&A, Matrix< float > &&B, int priority, int64_t queue_index)
 
template void trmm< Target::HostTask, double > (Side side, double alpha, TriangularMatrix< double > &&A, Matrix< double > &&B, int priority, int64_t queue_index)
 
template void trmm< Target::HostTask, std::complex< float > > (Side side, std::complex< float > alpha, TriangularMatrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, int priority, int64_t queue_index)
 
template void trmm< Target::HostTask, std::complex< double > > (Side side, std::complex< double > alpha, TriangularMatrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, int priority, int64_t queue_index)
 
template void trmm< Target::HostNest, float > (Side side, float alpha, TriangularMatrix< float > &&A, Matrix< float > &&B, int priority, int64_t queue_index)
 
template void trmm< Target::HostNest, double > (Side side, double alpha, TriangularMatrix< double > &&A, Matrix< double > &&B, int priority, int64_t queue_index)
 
template void trmm< Target::HostNest, std::complex< float > > (Side side, std::complex< float > alpha, TriangularMatrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, int priority, int64_t queue_index)
 
template void trmm< Target::HostNest, std::complex< double > > (Side side, std::complex< double > alpha, TriangularMatrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, int priority, int64_t queue_index)
 
template void trmm< Target::HostBatch, float > (Side side, float alpha, TriangularMatrix< float > &&A, Matrix< float > &&B, int priority, int64_t queue_index)
 
template void trmm< Target::HostBatch, double > (Side side, double alpha, TriangularMatrix< double > &&A, Matrix< double > &&B, int priority, int64_t queue_index)
 
template void trmm< Target::HostBatch, std::complex< float > > (Side side, std::complex< float > alpha, TriangularMatrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, int priority, int64_t queue_index)
 
template void trmm< Target::HostBatch, std::complex< double > > (Side side, std::complex< double > alpha, TriangularMatrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, int priority, int64_t queue_index)
 
template void trmm< Target::Devices, float > (Side side, float alpha, TriangularMatrix< float > &&A, Matrix< float > &&B, int priority, int64_t queue_index)
 
template void trmm< Target::Devices, double > (Side side, double alpha, TriangularMatrix< double > &&A, Matrix< double > &&B, int priority, int64_t queue_index)
 
template void trmm< Target::Devices, std::complex< float > > (Side side, std::complex< float > alpha, TriangularMatrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, int priority, int64_t queue_index)
 
template void trmm< Target::Devices, std::complex< double > > (Side side, std::complex< double > alpha, TriangularMatrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, int priority, int64_t queue_index)
 
template<typename scalar_t >
void norm (internal::TargetType< Target::HostTask >, Norm in_norm, NormScope scope, TrapezoidMatrix< scalar_t > &A, blas::real_type< scalar_t > *values, int priority, int queue_index)
 General matrix norm.
 
template<typename scalar_t >
void norm (internal::TargetType< Target::HostNest >, Norm in_norm, NormScope scope, TrapezoidMatrix< scalar_t > &A, blas::real_type< scalar_t > *values, int priority, int queue_index)
 General matrix norm.
 
template<typename scalar_t >
void norm (internal::TargetType< Target::Devices >, Norm in_norm, NormScope scope, TrapezoidMatrix< scalar_t > &A, blas::real_type< scalar_t > *values, int priority, int queue_index)
 Trapezoid and triangular matrix norm.
 
template void norm< Target::HostTask, float > (Norm in_norm, NormScope scope, TrapezoidMatrix< float > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::HostNest, float > (Norm in_norm, NormScope scope, TrapezoidMatrix< float > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::Devices, float > (Norm in_norm, NormScope scope, TrapezoidMatrix< float > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::HostTask, double > (Norm in_norm, NormScope scope, TrapezoidMatrix< double > &&A, double *values, int priority, int queue_index)
 
template void norm< Target::HostNest, double > (Norm in_norm, NormScope scope, TrapezoidMatrix< double > &&A, double *values, int priority, int queue_index)
 
template void norm< Target::Devices, double > (Norm in_norm, NormScope scope, TrapezoidMatrix< double > &&A, double *values, int priority, int queue_index)
 
template void norm< Target::HostTask, std::complex< float > > (Norm in_norm, NormScope scope, TrapezoidMatrix< std::complex< float > > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::HostNest, std::complex< float > > (Norm in_norm, NormScope scope, TrapezoidMatrix< std::complex< float > > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::Devices, std::complex< float > > (Norm in_norm, NormScope scope, TrapezoidMatrix< std::complex< float > > &&A, float *values, int priority, int queue_index)
 
template void norm< Target::HostTask, std::complex< double > > (Norm in_norm, NormScope scope, TrapezoidMatrix< std::complex< double > > &&A, double *values, int priority, int queue_index)
 
template void norm< Target::HostNest, std::complex< double > > (Norm in_norm, NormScope scope, TrapezoidMatrix< std::complex< double > > &&A, double *values, int priority, int queue_index)
 
template void norm< Target::Devices, std::complex< double > > (Norm in_norm, NormScope scope, TrapezoidMatrix< std::complex< double > > &&A, double *values, int priority, int queue_index)
 
template<typename scalar_t >
void trsm (internal::TargetType< Target::HostTask >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, Layout layout, int64_t queue_index)
 Triangular solve matrix (multiple right-hand sides).
 
template<typename scalar_t >
void trsm (internal::TargetType< Target::HostNest >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, Layout layout, int64_t queue_index)
 Triangular solve matrix (multiple right-hand sides).
 
template<typename scalar_t >
void trsm (internal::TargetType< Target::HostBatch >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, Layout layout, int64_t queue_index)
 Triangular solve matrix (multiple right-hand sides).
 
template<typename scalar_t >
void trsm (internal::TargetType< Target::Devices >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, Layout layout, int64_t queue_index)
 Triangular solve matrix (multiple right-hand sides).
 
template void trsm< Target::HostTask, float > (Side side, float alpha, TriangularMatrix< float > &&A, Matrix< float > &&B, int priority, Layout layout, int64_t queue_index)
 
template void trsm< Target::HostNest, float > (Side side, float alpha, TriangularMatrix< float > &&A, Matrix< float > &&B, int priority, Layout layout, int64_t queue_index)
 
template void trsm< Target::HostBatch, float > (Side side, float alpha, TriangularMatrix< float > &&A, Matrix< float > &&B, int priority, Layout layout, int64_t queue_index)
 
template void trsm< Target::Devices, float > (Side side, float alpha, TriangularMatrix< float > &&A, Matrix< float > &&B, int priority, Layout layout, int64_t queue_index)
 
template void trsm< Target::HostTask, double > (Side side, double alpha, TriangularMatrix< double > &&A, Matrix< double > &&B, int priority, Layout layout, int64_t queue_index)
 
template void trsm< Target::HostNest, double > (Side side, double alpha, TriangularMatrix< double > &&A, Matrix< double > &&B, int priority, Layout layout, int64_t queue_index)
 
template void trsm< Target::HostBatch, double > (Side side, double alpha, TriangularMatrix< double > &&A, Matrix< double > &&B, int priority, Layout layout, int64_t queue_index)
 
template void trsm< Target::Devices, double > (Side side, double alpha, TriangularMatrix< double > &&A, Matrix< double > &&B, int priority, Layout layout, int64_t queue_index)
 
template void trsm< Target::HostTask, std::complex< float > > (Side side, std::complex< float > alpha, TriangularMatrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, int priority, Layout layout, int64_t queue_index)
 
template void trsm< Target::HostNest, std::complex< float > > (Side side, std::complex< float > alpha, TriangularMatrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, int priority, Layout layout, int64_t queue_index)
 
template void trsm< Target::HostBatch, std::complex< float > > (Side side, std::complex< float > alpha, TriangularMatrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, int priority, Layout layout, int64_t queue_index)
 
template void trsm< Target::Devices, std::complex< float > > (Side side, std::complex< float > alpha, TriangularMatrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, int priority, Layout layout, int64_t queue_index)
 
template void trsm< Target::HostTask, std::complex< double > > (Side side, std::complex< double > alpha, TriangularMatrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, int priority, Layout layout, int64_t queue_index)
 
template void trsm< Target::HostNest, std::complex< double > > (Side side, std::complex< double > alpha, TriangularMatrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, int priority, Layout layout, int64_t queue_index)
 
template void trsm< Target::HostBatch, std::complex< double > > (Side side, std::complex< double > alpha, TriangularMatrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, int priority, Layout layout, int64_t queue_index)
 
template void trsm< Target::Devices, std::complex< double > > (Side side, std::complex< double > alpha, TriangularMatrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, int priority, Layout layout, int64_t queue_index)
 
template<typename scalar_t >
void trsmA (internal::TargetType< Target::HostTask >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, Layout layout, int64_t queue_index)
 Triangular solve matrix (multiple right-hand sides).
 
template<typename scalar_t >
void trsmA (internal::TargetType< Target::HostNest >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, Layout layout, int64_t queue_index)
 Triangular solve matrix (multiple right-hand sides).
 
template<typename scalar_t >
void trsmA (internal::TargetType< Target::HostBatch >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, Layout layout, int64_t queue_index)
 Triangular solve matrix (multiple right-hand sides).
 
template<typename scalar_t >
void trsmA (internal::TargetType< Target::Devices >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, Layout layout, int64_t queue_index)
 Triangular solve matrix (multiple right-hand sides).
 
template void trsmA< Target::HostTask, float > (Side side, float alpha, TriangularMatrix< float > &&A, Matrix< float > &&B, int priority, Layout layout, int64_t queue_index)
 
template void trsmA< Target::HostNest, float > (Side side, float alpha, TriangularMatrix< float > &&A, Matrix< float > &&B, int priority, Layout layout, int64_t queue_index)
 
template void trsmA< Target::HostBatch, float > (Side side, float alpha, TriangularMatrix< float > &&A, Matrix< float > &&B, int priority, Layout layout, int64_t queue_index)
 
template void trsmA< Target::Devices, float > (Side side, float alpha, TriangularMatrix< float > &&A, Matrix< float > &&B, int priority, Layout layout, int64_t queue_index)
 
template void trsmA< Target::HostTask, double > (Side side, double alpha, TriangularMatrix< double > &&A, Matrix< double > &&B, int priority, Layout layout, int64_t queue_index)
 
template void trsmA< Target::HostNest, double > (Side side, double alpha, TriangularMatrix< double > &&A, Matrix< double > &&B, int priority, Layout layout, int64_t queue_index)
 
template void trsmA< Target::HostBatch, double > (Side side, double alpha, TriangularMatrix< double > &&A, Matrix< double > &&B, int priority, Layout layout, int64_t queue_index)
 
template void trsmA< Target::Devices, double > (Side side, double alpha, TriangularMatrix< double > &&A, Matrix< double > &&B, int priority, Layout layout, int64_t queue_index)
 
template void trsmA< Target::HostTask, std::complex< float > > (Side side, std::complex< float > alpha, TriangularMatrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, int priority, Layout layout, int64_t queue_index)
 
template void trsmA< Target::HostNest, std::complex< float > > (Side side, std::complex< float > alpha, TriangularMatrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, int priority, Layout layout, int64_t queue_index)
 
template void trsmA< Target::HostBatch, std::complex< float > > (Side side, std::complex< float > alpha, TriangularMatrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, int priority, Layout layout, int64_t queue_index)
 
template void trsmA< Target::Devices, std::complex< float > > (Side side, std::complex< float > alpha, TriangularMatrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&B, int priority, Layout layout, int64_t queue_index)
 
template void trsmA< Target::HostTask, std::complex< double > > (Side side, std::complex< double > alpha, TriangularMatrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, int priority, Layout layout, int64_t queue_index)
 
template void trsmA< Target::HostNest, std::complex< double > > (Side side, std::complex< double > alpha, TriangularMatrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, int priority, Layout layout, int64_t queue_index)
 
template void trsmA< Target::HostBatch, std::complex< double > > (Side side, std::complex< double > alpha, TriangularMatrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, int priority, Layout layout, int64_t queue_index)
 
template void trsmA< Target::Devices, std::complex< double > > (Side side, std::complex< double > alpha, TriangularMatrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&B, int priority, Layout layout, int64_t queue_index)
 
template<typename scalar_t >
void trtri (internal::TargetType< Target::HostTask >, TriangularMatrix< scalar_t > &A, int priority)
 Triangular inversion of single tile, host implementation.
 
template void trtri< Target::HostTask, float > (TriangularMatrix< float > &&A, int priority)
 
template void trtri< Target::HostTask, double > (TriangularMatrix< double > &&A, int priority)
 
template void trtri< Target::HostTask, std::complex< float > > (TriangularMatrix< std::complex< float > > &&A, int priority)
 
template void trtri< Target::HostTask, std::complex< double > > (TriangularMatrix< std::complex< double > > &&A, int priority)
 
template<typename scalar_t >
void trtrm (internal::TargetType< Target::HostTask >, TriangularMatrix< scalar_t > &A, int priority)
 Triangular inversion of single tile, host implementation.
 
template void trtrm< Target::HostTask, float > (TriangularMatrix< float > &&A, int priority)
 
template void trtrm< Target::HostTask, double > (TriangularMatrix< double > &&A, int priority)
 
template void trtrm< Target::HostTask, std::complex< float > > (TriangularMatrix< std::complex< float > > &&A, int priority)
 
template void trtrm< Target::HostTask, std::complex< double > > (TriangularMatrix< std::complex< double > > &&A, int priority)
 
template<typename scalar_t >
void ttlqt (internal::TargetType< Target::HostTask >, Matrix< scalar_t > &A, Matrix< scalar_t > &T)
 Distributed LQ triangle-triangle factorization, host implementation.
 
template void ttlqt< Target::HostTask, float > (Matrix< float > &&A, Matrix< float > &&T)
 
template void ttlqt< Target::HostTask, double > (Matrix< double > &&A, Matrix< double > &&T)
 
template void ttlqt< Target::HostTask, std::complex< float > > (Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&T)
 
template void ttlqt< Target::HostTask, std::complex< double > > (Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&T)
 
template<typename scalar_t >
void ttmlq (internal::TargetType< Target::HostTask >, Side side, Op op, Matrix< scalar_t > &A, Matrix< scalar_t > &T, Matrix< scalar_t > &C, int tag)
 Distributed multiply matrix by Q from LQ triangle-triangle factorization of row of tiles, host implementation.
 
template void ttmlq< Target::HostTask, float > (Side side, Op op, Matrix< float > &&A, Matrix< float > &&T, Matrix< float > &&C, int tag)
 
template void ttmlq< Target::HostTask, double > (Side side, Op op, Matrix< double > &&A, Matrix< double > &&T, Matrix< double > &&C, int tag)
 
template void ttmlq< Target::HostTask, std::complex< float > > (Side side, Op op, Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&T, Matrix< std::complex< float > > &&C, int tag)
 
template void ttmlq< Target::HostTask, std::complex< double > > (Side side, Op op, Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&T, Matrix< std::complex< double > > &&C, int tag)
 
template<typename scalar_t >
void ttmqr (internal::TargetType< Target::HostTask >, Side side, Op op, Matrix< scalar_t > &A, Matrix< scalar_t > &T, Matrix< scalar_t > &C, int tag)
 Distributed multiply matrix by Q from QR triangle-triangle factorization of column of tiles, host implementation.
 
template void ttmqr< Target::HostTask, float > (Side side, Op op, Matrix< float > &&A, Matrix< float > &&T, Matrix< float > &&C, int tag)
 
template void ttmqr< Target::HostTask, double > (Side side, Op op, Matrix< double > &&A, Matrix< double > &&T, Matrix< double > &&C, int tag)
 
template void ttmqr< Target::HostTask, std::complex< float > > (Side side, Op op, Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&T, Matrix< std::complex< float > > &&C, int tag)
 
template void ttmqr< Target::HostTask, std::complex< double > > (Side side, Op op, Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&T, Matrix< std::complex< double > > &&C, int tag)
 
template<typename scalar_t >
void ttqrt (internal::TargetType< Target::HostTask >, Matrix< scalar_t > &A, Matrix< scalar_t > &T)
 Distributed QR triangle-triangle factorization, host implementation.
 
template void ttqrt< Target::HostTask, float > (Matrix< float > &&A, Matrix< float > &&T)
 
template void ttqrt< Target::HostTask, double > (Matrix< double > &&A, Matrix< double > &&T)
 
template void ttqrt< Target::HostTask, std::complex< float > > (Matrix< std::complex< float > > &&A, Matrix< std::complex< float > > &&T)
 
template void ttqrt< Target::HostTask, std::complex< double > > (Matrix< std::complex< double > > &&A, Matrix< std::complex< double > > &&T)
 
template<typename scalar_t >
void add (internal::TargetType< Target::HostTask >, scalar_t alpha, BaseTrapezoidMatrix< scalar_t > &A, scalar_t beta, BaseTrapezoidMatrix< scalar_t > &B, int priority, int queue_index)
 Trapezoidal matrix add.
 
template<typename scalar_t >
void add (internal::TargetType< Target::HostNest >, scalar_t alpha, BaseTrapezoidMatrix< scalar_t > &A, scalar_t beta, BaseTrapezoidMatrix< scalar_t > &B, int priority, int queue_index)
 
template<typename scalar_t >
void add (internal::TargetType< Target::HostBatch >, scalar_t alpha, BaseTrapezoidMatrix< scalar_t > &A, scalar_t beta, BaseTrapezoidMatrix< scalar_t > &B, int priority, int queue_index)
 
template<typename scalar_t >
void add (internal::TargetType< Target::Devices >, scalar_t alpha, BaseTrapezoidMatrix< scalar_t > &A, scalar_t beta, BaseTrapezoidMatrix< scalar_t > &B, int priority, int queue_index)
 Trapezoidal matrix add.
 
template void add< Target::HostTask, float > (float alpha, BaseTrapezoidMatrix< float > &&A, float beta, BaseTrapezoidMatrix< float > &&B, int priority, int queue_index)
 
template void add< Target::HostNest, float > (float alpha, BaseTrapezoidMatrix< float > &&A, float beta, BaseTrapezoidMatrix< float > &&B, int priority, int queue_index)
 
template void add< Target::HostBatch, float > (float alpha, BaseTrapezoidMatrix< float > &&A, float beta, BaseTrapezoidMatrix< float > &&B, int priority, int queue_index)
 
template void add< Target::Devices, float > (float alpha, BaseTrapezoidMatrix< float > &&A, float beta, BaseTrapezoidMatrix< float > &&B, int priority, int queue_index)
 
template void add< Target::HostTask, double > (double alpha, BaseTrapezoidMatrix< double > &&A, double beta, BaseTrapezoidMatrix< double > &&B, int priority, int queue_index)
 
template void add< Target::HostNest, double > (double alpha, BaseTrapezoidMatrix< double > &&A, double beta, BaseTrapezoidMatrix< double > &&B, int priority, int queue_index)
 
template void add< Target::HostBatch, double > (double alpha, BaseTrapezoidMatrix< double > &&A, double beta, BaseTrapezoidMatrix< double > &&B, int priority, int queue_index)
 
template void add< Target::Devices, double > (double alpha, BaseTrapezoidMatrix< double > &&A, double beta, BaseTrapezoidMatrix< double > &&B, int priority, int queue_index)
 
template void add< Target::HostTask, std::complex< float > > (std::complex< float > alpha, BaseTrapezoidMatrix< std::complex< float > > &&A, std::complex< float > beta, BaseTrapezoidMatrix< std::complex< float > > &&B, int priority, int queue_index)
 
template void add< Target::HostNest, std::complex< float > > (std::complex< float > alpha, BaseTrapezoidMatrix< std::complex< float > > &&A, std::complex< float > beta, BaseTrapezoidMatrix< std::complex< float > > &&B, int priority, int queue_index)
 
template void add< Target::HostBatch, std::complex< float > > (std::complex< float > alpha, BaseTrapezoidMatrix< std::complex< float > > &&A, std::complex< float > beta, BaseTrapezoidMatrix< std::complex< float > > &&B, int priority, int queue_index)
 
template void add< Target::Devices, std::complex< float > > (std::complex< float > alpha, BaseTrapezoidMatrix< std::complex< float > > &&A, std::complex< float > beta, BaseTrapezoidMatrix< std::complex< float > > &&B, int priority, int queue_index)
 
template void add< Target::HostTask, std::complex< double > > (std::complex< double > alpha, BaseTrapezoidMatrix< std::complex< double > > &&A, std::complex< double > beta, BaseTrapezoidMatrix< std::complex< double > > &&B, int priority, int queue_index)
 
template void add< Target::HostNest, std::complex< double > > (std::complex< double > alpha, BaseTrapezoidMatrix< std::complex< double > > &&A, std::complex< double > beta, BaseTrapezoidMatrix< std::complex< double > > &&B, int priority, int queue_index)
 
template void add< Target::HostBatch, std::complex< double > > (std::complex< double > alpha, BaseTrapezoidMatrix< std::complex< double > > &&A, std::complex< double > beta, BaseTrapezoidMatrix< std::complex< double > > &&B, int priority, int queue_index)
 
template void add< Target::Devices, std::complex< double > > (std::complex< double > alpha, BaseTrapezoidMatrix< std::complex< double > > &&A, std::complex< double > beta, BaseTrapezoidMatrix< std::complex< double > > &&B, int priority, int queue_index)
 
template<typename src_scalar_t , typename dst_scalar_t >
void copy (internal::TargetType< Target::HostTask >, BaseTrapezoidMatrix< src_scalar_t > &A, BaseTrapezoidMatrix< dst_scalar_t > &B, int priority, int queue_index)
 Copy and precision conversion.
 
template<typename src_scalar_t , typename dst_scalar_t >
void copy (internal::TargetType< Target::Devices >, BaseTrapezoidMatrix< src_scalar_t > &A, BaseTrapezoidMatrix< dst_scalar_t > &B, int priority, int queue_index)
 Copy and precision conversion.
 
template void copy< Target::HostTask, float, float > (BaseTrapezoidMatrix< float > &&A, BaseTrapezoidMatrix< float > &&B, int priority, int queue_index)
 
template void copy< Target::HostTask, float, double > (BaseTrapezoidMatrix< float > &&A, BaseTrapezoidMatrix< double > &&B, int priority, int queue_index)
 
template void copy< Target::Devices, float, float > (BaseTrapezoidMatrix< float > &&A, BaseTrapezoidMatrix< float > &&B, int priority, int queue_index)
 
template void copy< Target::Devices, float, double > (BaseTrapezoidMatrix< float > &&A, BaseTrapezoidMatrix< double > &&B, int priority, int queue_index)
 
template void copy< Target::HostTask, double, double > (BaseTrapezoidMatrix< double > &&A, BaseTrapezoidMatrix< double > &&B, int priority, int queue_index)
 
template void copy< Target::HostTask, double, float > (BaseTrapezoidMatrix< double > &&A, BaseTrapezoidMatrix< float > &&B, int priority, int queue_index)
 
template void copy< Target::Devices, double, double > (BaseTrapezoidMatrix< double > &&A, BaseTrapezoidMatrix< double > &&B, int priority, int queue_index)
 
template void copy< Target::Devices, double, float > (BaseTrapezoidMatrix< double > &&A, BaseTrapezoidMatrix< float > &&B, int priority, int queue_index)
 
template<typename scalar_t >
void scale (internal::TargetType< Target::HostTask >, blas::real_type< scalar_t > numer, blas::real_type< scalar_t > denom, BaseTrapezoidMatrix< scalar_t > &A, int priority, int queue_index)
 Scale Trapezoid matrix entries by the real scalar numer/denom.
 
template<typename scalar_t >
void scale (internal::TargetType< Target::HostNest >, blas::real_type< scalar_t > numer, blas::real_type< scalar_t > denom, BaseTrapezoidMatrix< scalar_t > &A, int priority, int queue_index)
 
template<typename scalar_t >
void scale (internal::TargetType< Target::HostBatch >, blas::real_type< scalar_t > numer, blas::real_type< scalar_t > denom, BaseTrapezoidMatrix< scalar_t > &A, int priority, int queue_index)
 
template<typename scalar_t >
void scale (internal::TargetType< Target::Devices >, blas::real_type< scalar_t > numer, blas::real_type< scalar_t > denom, BaseTrapezoidMatrix< scalar_t > &A, int priority, int queue_index)
 Scale Trapezoid matrix entries by the real scalar numer/denom.
 
template void scale< Target::HostTask, float > (float numer, float denom, BaseTrapezoidMatrix< float > &&A, int priority, int queue_index)
 
template void scale< Target::HostNest, float > (float numer, float denom, BaseTrapezoidMatrix< float > &&A, int priority, int queue_index)
 
template void scale< Target::HostBatch, float > (float numer, float denom, BaseTrapezoidMatrix< float > &&A, int priority, int queue_index)
 
template void scale< Target::Devices, float > (float numer, float denom, BaseTrapezoidMatrix< float > &&A, int priority, int queue_index)
 
template void scale< Target::HostTask, double > (double numer, double denom, BaseTrapezoidMatrix< double > &&A, int priority, int queue_index)
 
template void scale< Target::HostNest, double > (double numer, double denom, BaseTrapezoidMatrix< double > &&A, int priority, int queue_index)
 
template void scale< Target::HostBatch, double > (double numer, double denom, BaseTrapezoidMatrix< double > &&A, int priority, int queue_index)
 
template void scale< Target::Devices, double > (double numer, double denom, BaseTrapezoidMatrix< double > &&A, int priority, int queue_index)
 
template void scale< Target::HostTask, std::complex< float > > (float numer, float denom, BaseTrapezoidMatrix< std::complex< float > > &&A, int priority, int queue_index)
 
template void scale< Target::HostNest, std::complex< float > > (float numer, float denom, BaseTrapezoidMatrix< std::complex< float > > &&A, int priority, int queue_index)
 
template void scale< Target::HostBatch, std::complex< float > > (float numer, float denom, BaseTrapezoidMatrix< std::complex< float > > &&A, int priority, int queue_index)
 
template void scale< Target::Devices, std::complex< float > > (float numer, float denom, BaseTrapezoidMatrix< std::complex< float > > &&A, int priority, int queue_index)
 
template void scale< Target::HostTask, std::complex< double > > (double numer, double denom, BaseTrapezoidMatrix< std::complex< double > > &&A, int priority, int queue_index)
 
template void scale< Target::HostNest, std::complex< double > > (double numer, double denom, BaseTrapezoidMatrix< std::complex< double > > &&A, int priority, int queue_index)
 
template void scale< Target::HostBatch, std::complex< double > > (double numer, double denom, BaseTrapezoidMatrix< std::complex< double > > &&A, int priority, int queue_index)
 
template void scale< Target::Devices, std::complex< double > > (double numer, double denom, BaseTrapezoidMatrix< std::complex< double > > &&A, int priority, int queue_index)
 
template<typename scalar_t >
void set (internal::TargetType< Target::HostTask >, scalar_t offdiag_value, scalar_t diag_value, BaseTrapezoidMatrix< scalar_t > &A, int priority, int queue_index)
 Trapezoid matrix set.
 
template<typename scalar_t >
void set (internal::TargetType< Target::HostNest >, scalar_t offdiag_value, scalar_t diag_value, BaseTrapezoidMatrix< scalar_t > &A, int priority, int queue_index)
 
template<typename scalar_t >
void set (internal::TargetType< Target::HostBatch >, scalar_t offdiag_value, scalar_t diag_value, BaseTrapezoidMatrix< scalar_t > &A, int priority, int queue_index)
 
template<typename scalar_t >
void set (internal::TargetType< Target::Devices >, scalar_t offdiag_value, scalar_t diag_value, BaseTrapezoidMatrix< scalar_t > &A, int priority, int queue_index)
 Trapezoid matrix set.
 
template void set< Target::HostTask, float > (float offdiag_value, float diag_value, BaseTrapezoidMatrix< float > &&A, int priority, int queue_index)
 
template void set< Target::HostNest, float > (float offdiag_value, float diag_value, BaseTrapezoidMatrix< float > &&A, int priority, int queue_index)
 
template void set< Target::HostBatch, float > (float offdiag_value, float diag_value, BaseTrapezoidMatrix< float > &&A, int priority, int queue_index)
 
template void set< Target::Devices, float > (float offdiag_value, float diag_value, BaseTrapezoidMatrix< float > &&A, int priority, int queue_index)
 
template void set< Target::HostTask, double > (double offdiag_value, double diag_value, BaseTrapezoidMatrix< double > &&A, int priority, int queue_index)
 
template void set< Target::HostNest, double > (double offdiag_value, double diag_value, BaseTrapezoidMatrix< double > &&A, int priority, int queue_index)
 
template void set< Target::HostBatch, double > (double offdiag_value, double diag_value, BaseTrapezoidMatrix< double > &&A, int priority, int queue_index)
 
template void set< Target::Devices, double > (double offdiag_value, double diag_value, BaseTrapezoidMatrix< double > &&A, int priority, int queue_index)
 
template void set< Target::HostTask, std::complex< float > > (std::complex< float > offdiag_value, std::complex< float > diag_value, BaseTrapezoidMatrix< std::complex< float > > &&A, int priority, int queue_index)
 
template void set< Target::HostNest, std::complex< float > > (std::complex< float > offdiag_value, std::complex< float > diag_value, BaseTrapezoidMatrix< std::complex< float > > &&A, int priority, int queue_index)
 
template void set< Target::HostBatch, std::complex< float > > (std::complex< float > offdiag_value, std::complex< float > diag_value, BaseTrapezoidMatrix< std::complex< float > > &&A, int priority, int queue_index)
 
template void set< Target::Devices, std::complex< float > > (std::complex< float > offdiag_value, std::complex< float > diag_value, BaseTrapezoidMatrix< std::complex< float > > &&A, int priority, int queue_index)
 
template void set< Target::HostTask, std::complex< double > > (std::complex< double > offdiag_value, std::complex< double > diag_value, BaseTrapezoidMatrix< std::complex< double > > &&A, int priority, int queue_index)
 
template void set< Target::HostNest, std::complex< double > > (std::complex< double > offdiag_value, std::complex< double > diag_value, BaseTrapezoidMatrix< std::complex< double > > &&A, int priority, int queue_index)
 
template void set< Target::HostBatch, std::complex< double > > (std::complex< double > offdiag_value, std::complex< double > diag_value, BaseTrapezoidMatrix< std::complex< double > > &&A, int priority, int queue_index)
 
template void set< Target::Devices, std::complex< double > > (std::complex< double > offdiag_value, std::complex< double > diag_value, BaseTrapezoidMatrix< std::complex< double > > &&A, int priority, int queue_index)
 
template<Target target, typename scalar_t >
void unmlq (internal::TargetType< target >, Side side, Op op, Matrix< scalar_t > V, Matrix< scalar_t > &T, Matrix< scalar_t > &C, Matrix< scalar_t > &W, int priority, int64_t queue_index)
 Multiply matrix by Q from local LQ factorization.
 
template void unmlq< Target::HostTask, float > (Side side, Op op, Matrix< float > &&V, Matrix< float > &&T, Matrix< float > &&C, Matrix< float > &&W, int priority, int64_t queue_index)
 
template void unmlq< Target::HostNest, float > (Side side, Op op, Matrix< float > &&V, Matrix< float > &&T, Matrix< float > &&C, Matrix< float > &&W, int priority, int64_t queue_index)
 
template void unmlq< Target::HostBatch, float > (Side side, Op op, Matrix< float > &&V, Matrix< float > &&T, Matrix< float > &&C, Matrix< float > &&W, int priority, int64_t queue_index)
 
template void unmlq< Target::Devices, float > (Side side, Op op, Matrix< float > &&V, Matrix< float > &&T, Matrix< float > &&C, Matrix< float > &&W, int priority, int64_t queue_index)
 
template void unmlq< Target::HostTask, double > (Side side, Op op, Matrix< double > &&V, Matrix< double > &&T, Matrix< double > &&C, Matrix< double > &&W, int priority, int64_t queue_index)
 
template void unmlq< Target::HostNest, double > (Side side, Op op, Matrix< double > &&V, Matrix< double > &&T, Matrix< double > &&C, Matrix< double > &&W, int priority, int64_t queue_index)
 
template void unmlq< Target::HostBatch, double > (Side side, Op op, Matrix< double > &&V, Matrix< double > &&T, Matrix< double > &&C, Matrix< double > &&W, int priority, int64_t queue_index)
 
template void unmlq< Target::Devices, double > (Side side, Op op, Matrix< double > &&V, Matrix< double > &&T, Matrix< double > &&C, Matrix< double > &&W, int priority, int64_t queue_index)
 
template void unmlq< Target::HostTask, std::complex< float > > (Side side, Op op, Matrix< std::complex< float > > &&V, Matrix< std::complex< float > > &&T, Matrix< std::complex< float > > &&C, Matrix< std::complex< float > > &&W, int priority, int64_t queue_index)
 
template void unmlq< Target::HostNest, std::complex< float > > (Side side, Op op, Matrix< std::complex< float > > &&V, Matrix< std::complex< float > > &&T, Matrix< std::complex< float > > &&C, Matrix< std::complex< float > > &&W, int priority, int64_t queue_index)
 
template void unmlq< Target::HostBatch, std::complex< float > > (Side side, Op op, Matrix< std::complex< float > > &&V, Matrix< std::complex< float > > &&T, Matrix< std::complex< float > > &&C, Matrix< std::complex< float > > &&W, int priority, int64_t queue_index)
 
template void unmlq< Target::Devices, std::complex< float > > (Side side, Op op, Matrix< std::complex< float > > &&V, Matrix< std::complex< float > > &&T, Matrix< std::complex< float > > &&C, Matrix< std::complex< float > > &&W, int priority, int64_t queue_index)
 
template void unmlq< Target::HostTask, std::complex< double > > (Side side, Op op, Matrix< std::complex< double > > &&V, Matrix< std::complex< double > > &&T, Matrix< std::complex< double > > &&C, Matrix< std::complex< double > > &&W, int priority, int64_t queue_index)
 
template void unmlq< Target::HostNest, std::complex< double > > (Side side, Op op, Matrix< std::complex< double > > &&V, Matrix< std::complex< double > > &&T, Matrix< std::complex< double > > &&C, Matrix< std::complex< double > > &&W, int priority, int64_t queue_index)
 
template void unmlq< Target::HostBatch, std::complex< double > > (Side side, Op op, Matrix< std::complex< double > > &&V, Matrix< std::complex< double > > &&T, Matrix< std::complex< double > > &&C, Matrix< std::complex< double > > &&W, int priority, int64_t queue_index)
 
template void unmlq< Target::Devices, std::complex< double > > (Side side, Op op, Matrix< std::complex< double > > &&V, Matrix< std::complex< double > > &&T, Matrix< std::complex< double > > &&C, Matrix< std::complex< double > > &&W, int priority, int64_t queue_index)
 
template<Target target, typename scalar_t >
void unmqr (internal::TargetType< target >, Side side, Op op, Matrix< scalar_t > V, Matrix< scalar_t > &T, Matrix< scalar_t > &C, Matrix< scalar_t > &W, int priority, int64_t queue_index)
 Multiply matrix by Q from local QR factorization.
 
template void unmqr< Target::HostTask, float > (Side side, Op op, Matrix< float > &&V, Matrix< float > &&T, Matrix< float > &&C, Matrix< float > &&W, int priority, int64_t queue_index)
 
template void unmqr< Target::HostNest, float > (Side side, Op op, Matrix< float > &&V, Matrix< float > &&T, Matrix< float > &&C, Matrix< float > &&W, int priority, int64_t queue_index)
 
template void unmqr< Target::HostBatch, float > (Side side, Op op, Matrix< float > &&V, Matrix< float > &&T, Matrix< float > &&C, Matrix< float > &&W, int priority, int64_t queue_index)
 
template void unmqr< Target::Devices, float > (Side side, Op op, Matrix< float > &&V, Matrix< float > &&T, Matrix< float > &&C, Matrix< float > &&W, int priority, int64_t queue_index)
 
template void unmqr< Target::HostTask, double > (Side side, Op op, Matrix< double > &&V, Matrix< double > &&T, Matrix< double > &&C, Matrix< double > &&W, int priority, int64_t queue_index)
 
template void unmqr< Target::HostNest, double > (Side side, Op op, Matrix< double > &&V, Matrix< double > &&T, Matrix< double > &&C, Matrix< double > &&W, int priority, int64_t queue_index)
 
template void unmqr< Target::HostBatch, double > (Side side, Op op, Matrix< double > &&V, Matrix< double > &&T, Matrix< double > &&C, Matrix< double > &&W, int priority, int64_t queue_index)
 
template void unmqr< Target::Devices, double > (Side side, Op op, Matrix< double > &&V, Matrix< double > &&T, Matrix< double > &&C, Matrix< double > &&W, int priority, int64_t queue_index)
 
template void unmqr< Target::HostTask, std::complex< float > > (Side side, Op op, Matrix< std::complex< float > > &&V, Matrix< std::complex< float > > &&T, Matrix< std::complex< float > > &&C, Matrix< std::complex< float > > &&W, int priority, int64_t queue_index)
 
template void unmqr< Target::HostNest, std::complex< float > > (Side side, Op op, Matrix< std::complex< float > > &&V, Matrix< std::complex< float > > &&T, Matrix< std::complex< float > > &&C, Matrix< std::complex< float > > &&W, int priority, int64_t queue_index)
 
template void unmqr< Target::HostBatch, std::complex< float > > (Side side, Op op, Matrix< std::complex< float > > &&V, Matrix< std::complex< float > > &&T, Matrix< std::complex< float > > &&C, Matrix< std::complex< float > > &&W, int priority, int64_t queue_index)
 
template void unmqr< Target::Devices, std::complex< float > > (Side side, Op op, Matrix< std::complex< float > > &&V, Matrix< std::complex< float > > &&T, Matrix< std::complex< float > > &&C, Matrix< std::complex< float > > &&W, int priority, int64_t queue_index)
 
template void unmqr< Target::HostTask, std::complex< double > > (Side side, Op op, Matrix< std::complex< double > > &&V, Matrix< std::complex< double > > &&T, Matrix< std::complex< double > > &&C, Matrix< std::complex< double > > &&W, int priority, int64_t queue_index)
 
template void unmqr< Target::HostNest, std::complex< double > > (Side side, Op op, Matrix< std::complex< double > > &&V, Matrix< std::complex< double > > &&T, Matrix< std::complex< double > > &&C, Matrix< std::complex< double > > &&W, int priority, int64_t queue_index)
 
template void unmqr< Target::HostBatch, std::complex< double > > (Side side, Op op, Matrix< std::complex< double > > &&V, Matrix< std::complex< double > > &&T, Matrix< std::complex< double > > &&C, Matrix< std::complex< double > > &&W, int priority, int64_t queue_index)
 
template void unmqr< Target::Devices, std::complex< double > > (Side side, Op op, Matrix< std::complex< double > > &&V, Matrix< std::complex< double > > &&T, Matrix< std::complex< double > > &&C, Matrix< std::complex< double > > &&W, int priority, int64_t queue_index)
 
template<Target target, typename scalar_t >
void unmtr_hb2st (internal::TargetType< target >, Side side, Op op, Matrix< scalar_t > &V, Matrix< scalar_t > &C)
 Generic implementation of unmtr_hb2st.
 
template void unmtr_hb2st< Target::HostTask, float > (Side side, Op op, Matrix< float > &V, Matrix< float > &C)
 
template void unmtr_hb2st< Target::HostTask, double > (Side side, Op op, Matrix< double > &V, Matrix< double > &C)
 
template void unmtr_hb2st< Target::HostTask, std::complex< float > > (Side side, Op op, Matrix< std::complex< float > > &V, Matrix< std::complex< float > > &C)
 
template void unmtr_hb2st< Target::HostTask, std::complex< double > > (Side side, Op op, Matrix< std::complex< double > > &V, Matrix< std::complex< double > > &C)
 
template void unmtr_hb2st< Target::Devices, float > (Side side, Op op, Matrix< float > &V, Matrix< float > &C)
 
template void unmtr_hb2st< Target::Devices, double > (Side side, Op op, Matrix< double > &V, Matrix< double > &C)
 
template void unmtr_hb2st< Target::Devices, std::complex< float > > (Side side, Op op, Matrix< std::complex< float > > &V, Matrix< std::complex< float > > &C)
 
template void unmtr_hb2st< Target::Devices, std::complex< double > > (Side side, Op op, Matrix< std::complex< double > > &V, Matrix< std::complex< double > > &C)
 
template<typename T >
pow (T base, T exp)
 [internal] Computes the power function for integer arguments.
 
template int pow< int > (int base, int exp)
 
void mpi_max_nan (void *invec, void *inoutvec, int *len, MPI_Datatype *datatype)
 [internal] Implements a custom MPI reduction that propagates NaNs.
 
template<typename T1 , typename T2 >
bool compareSecond (std::pair< T1, T2 > const &a, std::pair< T1, T2 > const &b)
 Helper function to sort by second element of a pair.
 
template<typename scalar_t >
std::vector< int64_t > geqrf_compute_first_indices (Matrix< scalar_t > &A_panel, int64_t k)
 A helper function to find each rank's first (top-most) row in panel k for the QR-family of routines.
 
template<typename scalar_t >
std::vector< int64_t > gelqf_compute_first_indices (Matrix< scalar_t > A_panel, int64_t k)
 A helper function to find each rank's first (left-most) row in panel k for the LQ-family of routines.
 
template<typename scalar_t >
bool iterRefConverged (std::vector< scalar_t > &colnorms_R, std::vector< scalar_t > &colnorms_X, scalar_t cte)
 Helper function to check convergence in iterative methods.
 
template<typename scalar_t >
slate::Matrix< scalar_t > alloc_basis (slate::BaseMatrix< scalar_t > &A, int64_t n, Target target)
 Helper function to allocate a krylov basis.
 
template<typename scalar_t >
std::vector< int64_t > tile_offsets (RowCol dim, slate::BaseMatrix< scalar_t > &A)
 Computes the global index for each tile.
 
template<typename T >
next_power2 (T x)
 

Detailed Description

Namespace used for SLATE internal implementation.

It is intended that application code would not call any internal SLATE functions.

Function Documentation

◆ compareSecond()

template<typename T1 , typename T2 >
bool slate::internal::compareSecond ( std::pair< T1, T2 > const &  a,
std::pair< T1, T2 > const &  b 
)
inline

Helper function to sort by second element of a pair.

Used to sort rank_rows by row (see ttqrt, ttmqr), and rank_cols by col.

Returns
True if a.second < b.second.

◆ copy_col() [1/3]

template<typename real_t >
void slate::internal::copy_col ( Matrix< real_t > &  A,
int64_t  j,
int64_t  jj,
Matrix< real_t > &  B,
int64_t  k,
int64_t  kk 
)

Copy local rows of column from matrix A, tile j, column jj, to matrix B, tile k, column kk.

A and B must have the same distribution, number of rows, and tile mb; they may differ in the number of columns.

todo: This currently assumes tiles exist on the host.

Parameters
[in]AMatrix to copy from.
[in]jBlock column to copy from.
[in]jjOffset within block column j of column to copy.
[in,out]BMatrix to copy to. On output, local rows of B.at( :, k ).at( :, kk ) = A.at( :, j ).at( :, jj ).
[in]kBlock column to copy to.
[in]kkOffset within block column k of column to copy.

◆ copy_col() [2/3]

template<typename real_t >
void slate::internal::copy_col ( Matrix< real_t > &  A,
int64_t  j,
int64_t  jj,
real_t *  x 
)

Copy local rows of column from matrix A, tile j, column jj, to vector x.

todo: This currently assumes tiles exist on the host.

Parameters
[in]AMatrix to copy from.
[in]jBlock column to copy from.
[in]jjOffset within block column j of column to copy.
[out]xVector of length >= mlocal, the local number of rows of A. On output, copy of local rows of A.at( :, j ).at( :, jj ).

◆ copy_col() [3/3]

template<typename real_t >
void slate::internal::copy_col ( real_t *  x,
Matrix< real_t > &  A,
int64_t  j,
int64_t  jj 
)

Copy local rows of column from vector x to matrix A, tile j, column jj.

todo: This currently assumes tiles exist on the host.

Parameters
[in]xVector to copy from, of length >= mlocal, the local number of rows of A.
[in,out]AMatrix to copy to. On output, local rows of A.at( :, j ).at( :, jj ) are a copy of x.
[in]jBlock column to copy to.
[in]jjOffset within block column j of column to copy.

◆ cubeBcastPattern()

void slate::internal::cubeBcastPattern ( int  size,
int  rank,
int  radix,
std::list< int > &  recv_from,
std::list< int > &  send_to 
)

[internal] Implements a hypercube broadcast pattern.

For a given rank, finds the rank to receive from and the list of ranks to forward to. Assumes rank 0 as the root of the broadcast.

Parameters
[in]sizeNumber of ranks participating in the broadcast.
[in]rankRank of the local process.
[in]radixDimension of the cube.
[out]recv_rankList containing the the rank to receive from. Empty list for rank 0.
[out]send_toList of ranks to forward to.

◆ device_regions_build() [1/2]

template<bool store_diag, int mat_count, typename scalar_t , bool diag_same = !store_diag>
std::vector< device_regions_params< store_diag, mat_count > > slate::internal::device_regions_build ( std::array< std::reference_wrapper< BaseMatrix< scalar_t > >, mat_count >  mats,
std::array< scalar_t **, mat_count >  mats_array_host,
int64_t  device,
std::function< void(int64_t, int64_t, int64_t)>  extra_setup,
std::vector< int64_t > &  irange,
std::vector< int64_t > &  jrange 
)

Computes and populates the regions for the given matrices.

Template Parameters
store_diagWhether the diagonal tiles may need to be special cased
mat_countThe number of matrices used by the kernel
scalar_tThe type of the matrices

in] diag_same Whether to include the diagonal tiles in the off-diagonal groups If false, store_diag must be true

Parameters
[in]matsAn array of the matrices to build regions for
[in]mats_array_hostAn array of the arrays to fill with pointers to device data
[in]deviceThe device to build regions for
[in]extra_setupCallback that is called whenever a tile is added to a group. The group index and the tile indices are passed as arguments
Returns
A list of batches with identical size.
Parameters
[in]irangeThe ranges of tiles with a uniform number of rows
[in]jrangeThe ranges of tiles with a uniform number of columns

◆ device_regions_build() [2/2]

template<bool store_diag, int mat_count, typename scalar_t , bool diag_same = !store_diag>
std::vector< device_regions_params< store_diag, mat_count > > slate::internal::device_regions_build ( std::array< std::reference_wrapper< BaseMatrix< scalar_t > >, mat_count >  mats,
std::array< scalar_t **, mat_count >  mats_array_host,
int64_t  device,
std::function< void(int64_t, int64_t, int64_t)>  extra_setup = {} 
)

Computes and populates the regions for the given matrices.

Template Parameters
store_diagWhether the diagonal tiles may need to be special cased
mat_countThe number of matrices used by the kernel
scalar_tThe type of the matrices

in] diag_same Whether to include the diagonal tiles in the off-diagonal groups If false, store_diag must be true

Parameters
[in]matsAn array of the matrices to build regions for
[in]mats_array_hostAn array of the arrays to fill with pointers to device data
[in]deviceThe device to build regions for
[in]extra_setupCallback that is called whenever a tile is added to a group. The group index and the tile indices are passed as arguments
Returns
A list of batches with identical size.

◆ device_regions_range()

template<typename scalar_t >
std::vector< int64_t > slate::internal::device_regions_range ( RowCol  dim,
BaseMatrix< scalar_t > &  A 
)

Computes the range of tiles with either the same mb or the same nb.

Parameters
[in]dimWhether to compute the row ranges or the column ranges
[in]AThe matrix to get tile sizes from
Returns
The ranges of uniform tile sizes

◆ gelqf_compute_first_indices()

template<typename scalar_t >
std::vector< int64_t > slate::internal::gelqf_compute_first_indices ( Matrix< scalar_t >  A_panel,
int64_t  k 
)

A helper function to find each rank's first (left-most) row in panel k for the LQ-family of routines.

Parameters
[in]A_panelCurrent panel, which is a sub of the input matrix \(A\).
[in]kIndex of the current panel in the input matrix \(A\).
Returns
The array of computed indices.

◆ geqrf_compute_first_indices()

template<typename scalar_t >
std::vector< int64_t > slate::internal::geqrf_compute_first_indices ( Matrix< scalar_t > &  A_panel,
int64_t  k 
)

A helper function to find each rank's first (top-most) row in panel k for the QR-family of routines.

Parameters
[in]A_panelCurrent panel, which is a sub of the input matrix \(A\).
[in]kIndex of the current panel in the input matrix \(A\).
Returns
The array of computed indices.

◆ hebr1()

template<typename scalar_t >
void slate::internal::hebr1 ( internal::TargetType< Target::HostTask ,
int64_t  n,
scalar_t *  v,
HermitianMatrix< scalar_t > &  A,
int  priority 
)

Implements task type 1 in the tridiagonal bulge chasing algorithm, bringing the first column & row of A to tridiagonal.

See https://doi.org/10.1145/2063384.2063394 and http://www.icl.utk.edu/publications/swan-013 Here, the first block starts at \((0, 0)\), not at \((1, 0)\). todo: as compared to SVD?

Parameters
[in]nLength of vector v.
[out]vThe Householder reflector to zero A[2:n-1, 0].
[in,out]AThe first block of a sweep.

◆ hebr2()

template<typename scalar_t >
void slate::internal::hebr2 ( internal::TargetType< Target::HostTask ,
int64_t  n1,
scalar_t *  v1,
int64_t  n2,
scalar_t *  v2,
Matrix< scalar_t > &  A,
int  priority 
)

Implements task type 2 in the tridiagonal bulge chasing algorithm, updating an off-diagonal block, which creates a bulge, then bringing its first column back to the original bandwidth.

Parameters
[in]n1Length of vector v1.
[in]v1The Householder reflector produced by task type 1 or 2.
[in]n2Length of vector v2.
[out]v2The Householder reflector to zero A[1:n-1, 0].
[in,out]AAn off-diagonal block in a sweep.

◆ hemmA() [1/3]

template<typename scalar_t >
void slate::internal::hemmA ( internal::TargetType< Target::HostNest ,
Side  side,
scalar_t  alpha,
HermitianMatrix< scalar_t > &  A,
Matrix< scalar_t > &  B,
scalar_t  beta,
Matrix< scalar_t > &  C,
int  priority 
)

Hermitian matrix multiply to update trailing matrix.

Host nested OpenMP implementation.

◆ hemmA() [2/3]

template<typename scalar_t >
void slate::internal::hemmA ( internal::TargetType< Target::HostTask ,
Side  side,
scalar_t  alpha,
HermitianMatrix< scalar_t > &  A,
Matrix< scalar_t > &  B,
scalar_t  beta,
Matrix< scalar_t > &  C,
int  priority 
)

Hermitian matrix multiply to update trailing matrix.

Host OpenMP task implementation.

◆ hemmA() [3/3]

template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::hemmA ( Side  side,
scalar_t  alpha,
HermitianMatrix< scalar_t > &&  A,
Matrix< scalar_t > &&  B,
scalar_t  beta,
Matrix< scalar_t > &&  C,
int  priority 
)

Hermitian matrix multiply to update trailing matrix, where A is a single tile.

If side = left, B and C are each a single block row; if side = right, B and C are each a single block col. Unlike most BLAS operations, here op(B) and op(C) must be both the same, either both NoTrans or both ConjTrans. Dispatches to target implementations.

◆ need_Bi0()

template<typename scalar_t >
bool slate::internal::need_Bi0 ( HermitianMatrix< scalar_t >  AH,
int  mpi_rank,
int64_t  i,
std::vector< int64_t > &  panel_rank_rows 
)

Determines whether this process contributes to B(i, 0).

Specifically, it checks whether there is a j in panel_rank_rows such that AH(i, j) is local (taking into account the symmetric storage.)

◆ norm() [1/5]

template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::norm ( Norm  in_norm,
NormScope  scope,
HermitianBandMatrix< scalar_t > &&  A,
blas::real_type< scalar_t > *  values,
int  priority,
int  queue_index 
)

Hermitian banded matrix norm.

Dispatches to target implementations.

Parameters
[in]in_norm
  • Norm::Max: values is dimension 1 and contains the local max.
  • Norm::One: values is dimension n and contains the local column sum.
  • Norm::Inf: for Hermitian, same as Norm::One.
  • Norm::Fro: values is dimension 2 and contains the local scale and sum-of-squares.

◆ norm() [2/5]

template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::norm ( Norm  in_norm,
NormScope  scope,
HermitianMatrix< scalar_t > &&  A,
blas::real_type< scalar_t > *  values,
int  priority,
int  queue_index 
)

Hermitian matrix norm.

Dispatches to target implementations.

Parameters
[in]in_norm
  • Norm::Max: values is dimension 1 and contains the local max.
  • Norm::One: values is dimension n and contains the local column sum.
  • Norm::Inf: for Hermitian, same as Norm::One.
  • Norm::Fro: values is dimension 2 and contains the local scale and sum-of-squares.

◆ norm() [3/5]

template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::norm ( Norm  in_norm,
NormScope  scope,
Matrix< scalar_t > &&  A,
blas::real_type< scalar_t > *  values,
int  priority,
int  queue_index 
)

General matrix norm.

Dispatches to target implementations.

Parameters
[in]in_norm
  • Norm::Max: values is dimension 1 and contains the local max.
  • Norm::One: values is dimension n and contains the local column sum.
  • Norm::Inf: values is dimension m and contains the local row sum.
  • Norm::Fro: values is dimension 2 and contains the local scale and sum-of-squares.

◆ norm() [4/5]

template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::norm ( Norm  in_norm,
NormScope  scope,
SymmetricMatrix< scalar_t > &&  A,
blas::real_type< scalar_t > *  values,
int  priority,
int  queue_index 
)

Symmetric matrix norm.

Dispatches to target implementations.

Parameters
[in]in_norm
  • Norm::Max: values is dimension 1 and contains the local max.
  • Norm::One: values is dimension n and contains the local column sum.
  • Norm::Inf: for symmetric, same as Norm::One.
  • Norm::Fro: values is dimension 2 and contains the local scale and sum-of-squares.

◆ norm() [5/5]

template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::norm ( Norm  in_norm,
NormScope  scope,
TrapezoidMatrix< scalar_t > &&  A,
blas::real_type< scalar_t > *  values,
int  priority,
int  queue_index 
)

Trapezoid and triangular matrix norm.

Dispatches to target implementations.

Parameters
[in]in_norm
  • Norm::Max: values is dimension 1 and contains the local max.
  • Norm::One: values is dimension n and contains the local column sum.
  • Norm::Inf: values is dimension m and contains the local row sum.
  • Norm::Fro: values is dimension 2 and contains the local scale and sum-of-squares.

◆ norm1est()

template<typename scalar_t >
void slate::internal::norm1est ( Matrix< scalar_t > &  X,
Matrix< scalar_t > &  V,
Matrix< int64_t > &  isgn,
blas::real_type< scalar_t > *  est,
int *  kase,
std::vector< int64_t > &  isave 
)

Distributed parallel estimates of the 1-norm of a square matrix A.

Generic implementation for any target.

Estimates the 1-norm of a square matrix, using reverse communication for evaluating matrix-vector products.

Template Parameters
scalar_tOne of float, double, std::complex<float>, std::complex<double>.
Parameters
[in,out]XOn entry, the n-by-1 matrix \(X\). On an intermediate return, X should be overwritten by A * X, if kase=1 A^H * X, if kase=2
[in,out]VOn entry, the n-by-1 matrix \(V\). On exit, V = A*W, where est = norm(A) / norm(W) (W is not returned).
[out]isgnisgn is integer matrix with size n-by-1.
[in,out]estOn entry, with kase = 1 or 2 and isave[0] = 3, est should be unchanged from the previous call to norm1est. On exit, est is an estimate for norm(A).
[in,out]kaseOn the initial call to norm1est, kase should be 0. On an intermediate return, kase will be 1 or 2, indicating whether X should be overwritten by A * X or A^H * X. On exit, kase will again be 0.
[in,out]isaveisave is an integer vector, of size 4. isave is used to save variables between calls to norm1est. isave[0]: the step to do in the next iteration isave[1]: tile of maximum element in X isave[2]: index of maximum element in X isave[3]: number of iterations

Note in LAPACK, norm1est is lacn2

◆ norm1est_altsgn()

template<typename scalar_t >
void slate::internal::norm1est_altsgn ( Matrix< scalar_t > &  A)

An auxiliary routine to set the entries of a vector and alternating the vector entries signs.

Vector is stored as a matrix For each iteration in norm1est, if the new estimation is smaller than the current one, then call this routine to set a new search direction.

◆ permutation_to_sequential_pivot()

template<typename scalar_t >
void slate::internal::permutation_to_sequential_pivot ( std::vector< AuxPivot< scalar_t > > &  aux_pivot,
int64_t  diag_len,
int  mt,
int64_t  mb 
)

Convert pivot rows (i.e., permutation of 0, ..., m-1) to sequence of row-swaps to be applied to a matrix (i.e., LAPACK-style sequential pivots), for m = mt * mb rows.

Parameters
[in,out]aux_pivotOn entry, permutation of 0, ..., m-1 formed by diag_len swaps during panel factorization, in (tile index, offset) format. Actually, only first diag_len entries are accessed. On exit, first diag_len entries are LAPACK-style sequential pivots, in (tile index, offset) format.
[in]diag_lenLength of the diagonal, min( mb, nb ) of diagonal tile.
[in]mtNumber of block-rows in panel.
[in]mbNumber of rows in each tile.

◆ reduce_info()

void slate::internal::reduce_info ( int64_t *  info,
MPI_Comm  mpi_comm 
)

MPI reduce info, used in getrf, hetrf, etc.

Parameters
[in,out]infoOn input, status on each rank; 0 means no error. On output, smallest non-zero info among all MPI ranks, or zero if info is zero on all MPI ranks.
[in]mpi_commMPI communicator.

◆ tile_offsets()

template<typename scalar_t >
std::vector< int64_t > slate::internal::tile_offsets ( RowCol  dim,
slate::BaseMatrix< scalar_t > &  A 
)

Computes the global index for each tile.

Parameters
[in]dimWhether to compute the row or column indices
[in]AThe matrix to get tile sizes from
Returns
a vector mapping tile indices to global indices