SLATE 2024.05.31
Software for Linear Algebra Targeting Exascale
Loading...
Searching...
No Matches

Functions

template<typename scalar_t >
void slate::internal::gerbt (Matrix< scalar_t > A11, Matrix< scalar_t > A12, Matrix< scalar_t > A21, Matrix< scalar_t > A22, Matrix< scalar_t > U1, Matrix< scalar_t > U2, Matrix< scalar_t > V1, Matrix< scalar_t > V2)
 Applies a single butterfly matrix to each side of A.
 
template<typename scalar_t >
void slate::internal::gerbt (Side side, Op trans, Matrix< scalar_t > B1, Matrix< scalar_t > B2, Matrix< scalar_t > U1, Matrix< scalar_t > U2)
 Applies a single butterfly matrix to one side of B.
 
template<typename scalar_t >
void slate::internal::getrf_panel (internal::TargetType< Target::HostTask >, Matrix< scalar_t > &A, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, blas::real_type< scalar_t > pivot_threshold, int max_panel_threads, int priority, int tag, int64_t *info)
 LU factorization of a column of tiles, host implementation.
 
template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::getrf_panel (Matrix< scalar_t > &&A, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, blas::real_type< scalar_t > pivot_threshold, int max_panel_threads, int priority, int tag, int64_t *info)
 LU factorization of a column of tiles.
 
template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::getrf_nopiv (Matrix< scalar_t > &&A, int64_t ib, int priority, int64_t *info)
 LU factorization of single tile without pivoting.
 
template<typename scalar_t >
void slate::internal::getrf_nopiv (internal::TargetType< Target::HostTask >, Matrix< scalar_t > &A, int64_t ib, int priority, int64_t *info)
 LU factorization of single tile without pivoting, host implementation.
 
template<typename scalar_t >
void slate::internal::getrf_tntpiv_local (internal::TargetType< Target::HostTask >, std::vector< Tile< scalar_t > > &tiles, std::vector< char * > dwork_array, size_t dwork_bytes, int mlocal, int device, lapack::Queue *queue, int64_t diag_len, int64_t ib, int stage, int64_t mb, int64_t nb, std::vector< int64_t > &tile_indices, std::vector< std::vector< AuxPivot< scalar_t > > > &aux_pivot, int mpi_rank, int max_panel_threads, int priority, int64_t *info)
 Multi-threaded LU factorization of local tiles.
 
template<Target target, typename scalar_t >
void slate::internal::getrf_tntpiv_panel (internal::TargetType< target >, Matrix< scalar_t > &A, Matrix< scalar_t > &Awork, std::vector< char * > dwork_array, size_t work_bytes, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, int max_panel_threads, int priority, int64_t *info)
 LU factorization of a column of tiles.
 
template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::getrf_tntpiv_panel (Matrix< scalar_t > &&A, Matrix< scalar_t > &&Awork, std::vector< char * > dwork_array, size_t work_bytes, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, int max_panel_threads, int priority, int64_t *info)
 LU factorization of a column of tiles.
 
template<typename scalar_t >
void slate::internal::rbt_fill (Matrix< scalar_t > &U, const int64_t seed)
 Allocates and fills a random butterfly transform in packed storage.
 
template<typename scalar_t >
std::pair< Matrix< scalar_t >, Matrix< scalar_t > > slate::internal::rbt_generate (const Matrix< scalar_t > &A, const int64_t d, const int64_t seed)
 Constructs two random butterfly matrices in packed storage to transform the given matrix.
 

Detailed Description

Function Documentation

◆ gerbt() [1/2]

template<typename scalar_t >
void slate::internal::gerbt ( Matrix< scalar_t >  A11,
Matrix< scalar_t >  A12,
Matrix< scalar_t >  A21,
Matrix< scalar_t >  A22,
Matrix< scalar_t >  U1,
Matrix< scalar_t >  U2,
Matrix< scalar_t >  V1,
Matrix< scalar_t >  V2 
)

Applies a single butterfly matrix to each side of A.

The matrices are divided into the submatrices along the halfs of the butterfly matrices.

◆ gerbt() [2/2]

template<typename scalar_t >
void slate::internal::gerbt ( Side  side,
Op  trans,
Matrix< scalar_t >  B1,
Matrix< scalar_t >  B2,
Matrix< scalar_t >  U1,
Matrix< scalar_t >  U2 
)

Applies a single butterfly matrix to one side of B.

The matrices are divided into the submatrices along the half of the butterfly matrix.

◆ getrf_nopiv() [1/2]

template<typename scalar_t >
void slate::internal::getrf_nopiv ( internal::TargetType< Target::HostTask ,
Matrix< scalar_t > &  A,
int64_t  ib,
int  priority,
int64_t *  info 
)

LU factorization of single tile without pivoting, host implementation.

Parameters
[in,out]infoExit status.
  • 0: successful exit
  • i > 0: U(i,i) is exactly zero (1-based index). The factorization will have NaN due to division by zero.

◆ getrf_nopiv() [2/2]

template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::getrf_nopiv ( Matrix< scalar_t > &&  A,
int64_t  ib,
int  priority,
int64_t *  info 
)

LU factorization of single tile without pivoting.

Dispatches to target implementations.

◆ getrf_panel()

template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::getrf_panel ( Matrix< scalar_t > &&  A,
int64_t  diag_len,
int64_t  ib,
std::vector< Pivot > &  pivot,
blas::real_type< scalar_t >  pivot_threshold,
int  max_panel_threads,
int  priority,
int  tag,
int64_t *  info 
)

LU factorization of a column of tiles.

Dispatches to target implementations.

◆ getrf_tntpiv_local()

template<typename scalar_t >
void slate::internal::getrf_tntpiv_local ( internal::TargetType< Target::HostTask ,
std::vector< Tile< scalar_t > > &  tiles,
std::vector< char * >  dwork_array,
size_t  dwork_bytes,
int  mlocal,
int  device,
lapack::Queue *  queue,
int64_t  diag_len,
int64_t  ib,
int  stage,
int64_t  mb,
int64_t  nb,
std::vector< int64_t > &  tile_indices,
std::vector< std::vector< AuxPivot< scalar_t > > > &  aux_pivot,
int  mpi_rank,
int  max_panel_threads,
int  priority,
int64_t *  info 
)

Multi-threaded LU factorization of local tiles.

Parameters
[in]targetTarget for dispatch to correct implementation.
[in,out]tilesList of tiles to factor on the CPU.
[in,out]dwork_arrayArray of GPU device workspaces, dimension (num_devices). dwork_array[ dev ] stores dA, dwork, dipiv, and dinfo on GPU dev; dA is contiguous copy of tiles on GPU, dwork is getrf workspace, dipiv is pivot vector, dinfo is getrf return value.
[in]dwork_bytesTotal size of dwork_array[ dev ] in bytes for each GPU device.
[in]mlocalNumber of rows in dwork_array.
[in]deviceDevice performing factorization, needed for pointing to correct memory in dwork_array. Device == HostNum for CPU implementation.
[in]queueQueue associated to input device.
[in]diag_lenLength of diagonal, min( mb, nb ) of diagonal tile.
[in]ibInner blocking.
[in]stageStage = 0 is initial local tiles, stage = 1 is subsequent tournament.
[in]mbTile row block size.
[in]nbTile column block size.
[in]tile_indicesBlock row indices of tiles in tiles array.
[in]mpi_rankMPI rank of this process.
[in]max_panel_threadsMaximum number of threads to launch for local panel.
[in]priorityOpenMP priority. todo: unused. Should it be on taskloop?

◆ getrf_tntpiv_panel()

template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::getrf_tntpiv_panel ( Matrix< scalar_t > &&  A,
Matrix< scalar_t > &&  Awork,
std::vector< char * >  dwork_array,
size_t  work_bytes,
int64_t  diag_len,
int64_t  ib,
std::vector< Pivot > &  pivot,
int  max_panel_threads,
int  priority,
int64_t *  info 
)

LU factorization of a column of tiles.

Dispatches to target implementations.

◆ rbt_fill()

template<typename scalar_t >
void slate::internal::rbt_fill ( Matrix< scalar_t > &  U,
const int64_t  seed 
)

Allocates and fills a random butterfly transform in packed storage.

The depth is computed based on the number of column in U.

◆ rbt_generate()

template<typename scalar_t >
std::pair< Matrix< scalar_t >, Matrix< scalar_t > > slate::internal::rbt_generate ( const Matrix< scalar_t > &  A,
const int64_t  d,
const int64_t  seed 
)

Constructs two random butterfly matrices in packed storage to transform the given matrix.

Parameters
[in]AThe matrix to be transformed
[in]dThe depth of the transform
[in]seedA seed for controlling the random number generation
Returns
a tuple containing the left and right transforms