SLATE 2024.05.31
Software for Linear Algebra Targeting Exascale
All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Modules Pages

Functions

template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::geqrf (Matrix< scalar_t > &&A, Matrix< scalar_t > &&T, std::vector< scalar_t * > dwork_array, size_t work_size, int64_t ib, int max_panel_threads, int priority)
 QR factorization of a column of tiles.
 
template<typename scalar_t >
void slate::internal::geqrf (internal::TargetType< Target::HostTask >, Matrix< scalar_t > &A, Matrix< scalar_t > &T, std::vector< scalar_t * > dwork_array, size_t work_size, int64_t ib, int max_panel_threads, int priority)
 QR factorization of a column of tiles, HostTask implementation.
 
template<typename scalar_t >
void slate::internal::geqrf (internal::TargetType< Target::HostNest >, Matrix< scalar_t > &A, Matrix< scalar_t > &T, std::vector< scalar_t * > dwork_array, size_t work_size, int64_t ib, int max_panel_threads, int priority)
 QR factorization of a column of tiles, HostNest implementation.
 
template<typename scalar_t >
void slate::internal::geqrf (internal::TargetType< Target::HostBatch >, Matrix< scalar_t > &A, Matrix< scalar_t > &T, std::vector< scalar_t * > dwork_array, size_t work_size, int64_t ib, int max_panel_threads, int priority)
 QR factorization of a column of tiles, HostBatch implementation.
 
template<typename scalar_t >
void slate::internal::geqrf (internal::TargetType< Target::Devices >, Matrix< scalar_t > &A, Matrix< scalar_t > &T, std::vector< scalar_t * > dwork_array, size_t work_size, int64_t ib, int max_panel_threads, int priority)
 QR factorization of a column of tiles, device implementation.
 
template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::ttmqr (Side side, Op op, Matrix< scalar_t > &&A, Matrix< scalar_t > &&T, Matrix< scalar_t > &&C, int tag)
 Distributed multiply matrix by Q from QR triangle-triangle factorization of column of tiles.
 
template<typename scalar_t >
void slate::internal::ttmqr (internal::TargetType< Target::HostTask >, Side side, Op op, Matrix< scalar_t > &A, Matrix< scalar_t > &T, Matrix< scalar_t > &C, int tag)
 Distributed multiply matrix by Q from QR triangle-triangle factorization of column of tiles, host implementation.
 
template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::ttqrt (Matrix< scalar_t > &&A, Matrix< scalar_t > &&T)
 Distributed QR triangle-triangle factorization of column of tiles.
 
template<typename scalar_t >
void slate::internal::ttqrt (internal::TargetType< Target::HostTask >, Matrix< scalar_t > &A, Matrix< scalar_t > &T)
 Distributed QR triangle-triangle factorization, host implementation.
 
template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::unmqr (Side side, Op op, Matrix< scalar_t > &&V, Matrix< scalar_t > &&T, Matrix< scalar_t > &&C, Matrix< scalar_t > &&W, int priority, int64_t queue_index)
 Multiply matrix by Q from local QR factorization.
 
template<Target target, typename scalar_t >
void slate::internal::unmqr (internal::TargetType< target >, Side side, Op op, Matrix< scalar_t > V, Matrix< scalar_t > &T, Matrix< scalar_t > &C, Matrix< scalar_t > &W, int priority, int64_t queue_index)
 Multiply matrix by Q from local QR factorization.
 

Detailed Description

Function Documentation

◆ geqrf() [1/3]

template<typename scalar_t >
void slate::internal::geqrf ( internal::TargetType< Target::HostBatch ,
Matrix< scalar_t > &  A,
Matrix< scalar_t > &  T,
std::vector< scalar_t * >  dwork_array,
size_t  work_size,
int64_t  ib,
int  max_panel_threads,
int  priority 
)

QR factorization of a column of tiles, HostBatch implementation.

Forwarding to HostTask as there is no implementation currently.

◆ geqrf() [2/3]

template<typename scalar_t >
void slate::internal::geqrf ( internal::TargetType< Target::HostNest ,
Matrix< scalar_t > &  A,
Matrix< scalar_t > &  T,
std::vector< scalar_t * >  dwork_array,
size_t  work_size,
int64_t  ib,
int  max_panel_threads,
int  priority 
)

QR factorization of a column of tiles, HostNest implementation.

Forwarding to HostTask as there is no implementation currently.

◆ geqrf() [3/3]

template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::geqrf ( Matrix< scalar_t > &&  A,
Matrix< scalar_t > &&  T,
std::vector< scalar_t * >  dwork_array,
size_t  work_size,
int64_t  ib,
int  max_panel_threads,
int  priority 
)

QR factorization of a column of tiles.

Dispatches to target implementations.

◆ ttmqr()

template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::ttmqr ( Side  side,
Op  op,
Matrix< scalar_t > &&  A,
Matrix< scalar_t > &&  T,
Matrix< scalar_t > &&  C,
int  tag 
)

Distributed multiply matrix by Q from QR triangle-triangle factorization of column of tiles.

Dispatches to target implementations. todo: This assumes A and T have already been communicated as needed. However, it necessarily handles communication for C. Tag is used in geqrf to differentiate communication for look-ahead panel from rest of trailing matrix.

◆ ttqrt() [1/2]

template<typename scalar_t >
void slate::internal::ttqrt ( internal::TargetType< Target::HostTask ,
Matrix< scalar_t > &  A,
Matrix< scalar_t > &  T 
)

Distributed QR triangle-triangle factorization, host implementation.

Assumes panel tiles reside on host.

◆ ttqrt() [2/2]

template<Target target = Target::HostTask, typename scalar_t >
void slate::internal::ttqrt ( Matrix< scalar_t > &&  A,
Matrix< scalar_t > &&  T 
)

Distributed QR triangle-triangle factorization of column of tiles.

Each rank has one triangular tile, the result of local geqrf panel. Dispatches to target implementations.

◆ unmqr()

template<Target target, typename scalar_t >
void slate::internal::unmqr ( internal::TargetType< target >  ,
Side  side,
Op  op,
Matrix< scalar_t >  V,
Matrix< scalar_t > &  T,
Matrix< scalar_t > &  C,
Matrix< scalar_t > &  W,
int  priority,
int64_t  queue_index 
)

Multiply matrix by Q from local QR factorization.

C = op(Q) C for side = left, or C = C op(Q) for side = right. Assumes V and T are each a single block-column. Assumes W and C have the same dimensions and distribution. This corresponds to larfb( ..., direct=Forward, storev=Columnwise, ... ). This does not include applying the distributed triangle-triangle reductions.