SLATE 2024.05.31
Software for Linear Algebra Targeting Exascale
Loading...
Searching...
No Matches
slate::BaseBandMatrix< scalar_t > Class Template Reference

Base class for all SLATE distributed, tiled banded storage matrices. More...

#include <BaseBandMatrix.hh>

Inheritance diagram for slate::BaseBandMatrix< scalar_t >:
slate::BaseMatrix< scalar_t > slate::BandMatrix< scalar_t > slate::BaseTriangularBandMatrix< scalar_t > slate::HermitianBandMatrix< scalar_t > slate::TriangularBandMatrix< scalar_t >

Public Types

using ij_tuple = std::tuple< int64_t, int64_t >
 
- Public Types inherited from slate::BaseMatrix< scalar_t >
using BcastList = std::vector< std::tuple< int64_t, int64_t, std::list< BaseMatrix< scalar_t > > > >
 
using BcastListTag = std::vector< std::tuple< int64_t, int64_t, std::list< BaseMatrix< scalar_t > >, int64_t > >
 
using ReduceList = std::vector< std::tuple< int64_t, int64_t, BaseMatrix< scalar_t >, std::list< BaseMatrix< scalar_t > > > >
 
using ij_tuple = typename MatrixStorage< scalar_t >::ij_tuple
 
typedef scalar_t value_type
 

Public Member Functions

int64_t getMaxDeviceTiles (int device)
 Returns number of local tiles of the matrix on this rank and given device.
 
int64_t getMaxDeviceTiles ()
 Returns number of local tiles of the matrix on this rank and given device.
 
void allocateBatchArrays (int64_t batch_size=0, int64_t num_arrays=1)
 Allocates batch arrays and BLAS++ queues for all devices.
 
void reserveDeviceWorkspace ()
 Reserve space for temporary workspace tiles on all GPU devices.
 
Matrix< scalar_t > sub (int64_t i1, int64_t i2, int64_t j1, int64_t j2)
 Returns sub-matrix that is a shallow copy view of the parent matrix, A[ i1:i2, j1:j2 ].
 
void tileUpdateAllOrigin ()
 Move all tiles back to their origin.
 
- Public Member Functions inherited from slate::BaseMatrix< scalar_t >
template<typename out_scalar_t >
BaseMatrix< out_scalar_t > baseEmptyLike (int64_t mb, int64_t nb, Op deepOp)
 [internal]
 
Tile< scalar_t > operator() (int64_t i, int64_t j, int device=HostNum)
 Get shallow copy of tile {i, j} of op(A) on given device, with the tile's op flag set to match the matrix's.
 
Tile< scalar_t > at (int64_t i, int64_t j, int device=HostNum)
 Alias of operator().
 
int num_devices () const
 Returns number of devices (per MPI process) to distribute matrix to.
 
void gridinfo (GridOrder *order, int *nprow, int *npcol, int *myrow, int *mycol)
 Get nprow, npcol, myrow, mycol for 2D block cyclic (2DBC) distribution.
 
std::function< int64_t(int64_t i)> tileMbFunc () const
 Returns tileMb function.
 
std::function< int64_t(int64_t j)> tileNbFunc () const
 Returns tileNb function.
 
std::function< int(ij_tuple ij)> tileRankFunc () const
 Returns tileRank function.
 
std::function< int(ij_tuple ij)> tileDeviceFunc () const
 Returns tileDevice function.
 
int64_t m () const
 Returns number of rows in op(A).
 
int64_t n () const
 Returns number of columns in op(A).
 
int64_t mt () const
 Returns number of block rows in op(A).
 
int64_t nt () const
 Returns number of block cols in op(A).
 
Op op () const
 Returns transposition operation op(A) as NoTrans, Trans, or ConjTrans.
 
bool tileExists (int64_t i, int64_t j, int device=HostNum)
 returns true if tile exists on specified device
 
int tileRank (int64_t i, int64_t j) const
 Returns MPI rank of tile {i, j} of op(A).
 
int tileDevice (int64_t i, int64_t j) const
 Returns device of tile {i, j} of op(A).
 
bool tileIsLocal (int64_t i, int64_t j) const
 Returns whether tile {i, j} of op(A) is local.
 
Uplo uplo () const
 Returns whether op(A) is logically Lower, Upper, or General.
 
Uplo uploLogical () const
 Returns whether op(A) is logically Lower, Upper, or General storage, taking the transposition operation into account.
 
Uplo uploPhysical () const
 Returns whether A is physically Lower, Upper, or General storage, ignoring the transposition operation.
 
Tile< scalar_t > originTile (int64_t i, int64_t j)
 Returns the origin tile instance of tile(i, j)
 
Target origin () const
 Tile origin.
 
int64_t tileMb (int64_t i) const
 Returns number of rows (mb) in block row i of op(A).
 
int64_t tileNb (int64_t j) const
 Returns number of cols (nb) in block col j of op(A).
 
Tile< scalar_t > tileInsert (int64_t i, int64_t j, int device=HostNum)
 Insert tile {i, j} of op(A) and allocate its data.
 
Tile< scalar_t > tileInsert (int64_t i, int64_t j, int device, scalar_t *A, int64_t ld)
 Insert tile {i, j} of op(A) with existing data.
 
Tile< scalar_t > tileInsert (int64_t i, int64_t j, scalar_t *A, int64_t ld)
 Insert tile with default device=HostNum.
 
Tile< scalar_t > tileInsertWorkspace (int64_t i, int64_t j, int device, Layout layout)
 Insert a workspace tile {i, j} of op(A) and allocate its data.
 
Tile< scalar_t > tileInsertWorkspace (int64_t i, int64_t j, int device)
 
Tile< scalar_t > tileInsertWorkspace (int64_t i, int64_t j, Layout layout)
 
Tile< scalar_t > tileInsertWorkspace (int64_t i, int64_t j)
 
scalar_t * allocWorkspaceBuffer (int device, int64_t size)
 Allocates a workspace buffer using the matrix's memory pool.
 
void freeWorkspaceBuffer (int device, scalar_t *buffer)
 Frees a workspace buffer allocated with BaseMatrix::allocWorkspaceBuffer.
 
MOSI tileState (int64_t i, int64_t j, int device=HostNum)
 Returns tile(i, j)'s state on device (defaults to host).
 
bool tileOnHold (int64_t i, int64_t j, int device=HostNum)
 Returns whether tile(i, j) is OnHold on device (defaults to host).
 
void tileUnsetHold (int64_t i, int64_t j, int device=HostNum)
 Unsets the hold of tile(i, j) on device (defaults to host) if it was OnHold.
 
void tileUnsetHoldAll (int device=HostNum)
 Unsets all local tiles' hold on device.
 
void tileUnsetHoldAllOnDevices ()
 Unsets all local tiles' hold on all devices.
 
void tileModified (int64_t i, int64_t j, int device=HostNum, bool permissive=false)
 Marks tile(i, j) as Modified on device.
 
void tileAcquire (int64_t i, int64_t j, int device, Layout layout)
 Acquire tile(i, j) on device without copying data if not already exists.
 
void tileAcquire (int64_t i, int64_t j, Layout layout)
 
void tileGetForReading (int64_t i, int64_t j, int device, LayoutConvert layout)
 Gets tile(i, j) for reading on device.
 
void tileGetForReading (std::set< ij_tuple > &tile_set, int device, LayoutConvert layout)
 Gets a set of tiles for reading on device.
 
void tileGetForReading (int64_t i, int64_t j, LayoutConvert layout)
 Gets tile(i, j) for reading on host.
 
void tileGetForReading (std::set< ij_tuple > &tile_set, LayoutConvert layout)
 Gets a set of tiles for reading on host.
 
void tileGetForReading (std::set< ij_tuple > &tile_set, LayoutConvert layout, int from_device)
 
void tileGetAllForReading (int device, LayoutConvert layout)
 Gets all local tiles for reading on device.
 
void tileGetAllForReadingOnDevices (LayoutConvert layout)
 Gets all local tiles for reading on corresponding devices.
 
void tileGetForWriting (int64_t i, int64_t j, int device, LayoutConvert layout)
 Gets tile(i, j) for writing on device.
 
void tileGetForWriting (std::set< ij_tuple > &tile_set, int device, LayoutConvert layout)
 Gets a set of tiles for writing on device.
 
void tileGetForWriting (int64_t i, int64_t j, LayoutConvert layout)
 Gets tile(i, j) for writing on host.
 
void tileGetForWriting (std::set< ij_tuple > &tile_set, LayoutConvert layout)
 Gets a set of tiles for writing on host.
 
void tileGetAllForWriting (int device, LayoutConvert layout)
 Gets all local tiles for writing on device.
 
void tileGetAllForWritingOnDevices (LayoutConvert layout)
 Gets all local tiles for writing on corresponding devices.
 
void tileGetAndHold (int64_t i, int64_t j, int device, LayoutConvert layout)
 Gets tile(i, j) on device for reading and marks it as MOSI::OnHold.
 
void tileGetAndHold (int64_t i, int64_t j, LayoutConvert layout)
 Gets tile(i, j) on host for reading and marks it as MOSI::OnHold.
 
void tileGetAndHold (std::set< ij_tuple > &tile_set, int device, LayoutConvert layout)
 Gets a set of tiles for reading on device and marks them as MOSI::OnHold.
 
void tileGetAndHold (std::set< ij_tuple > &tile_set, LayoutConvert layout)
 Gets a set of tiles for reading on host and marks them as MOSI::OnHold.
 
void tileGetAndHoldAll (int device, LayoutConvert layout)
 Gets all local tiles on device and marks them as MOSI::OnHold.
 
void tileGetAndHoldAllOnDevices (LayoutConvert layout)
 Gets all local tiles on corresponding devices and marks them as MOSI::OnHold.
 
Tile< scalar_t > tileUpdateOrigin (int64_t i, int64_t j)
 Updates the origin instance of tile(i, j) if MOSI::Invalid tile must be local.
 
void tileUpdateAllOrigin ()
 Updates all origin instances of local tiles if MOSI::Invalid.
 
int64_t tileLife (int64_t i, int64_t j) const
 Returns life counter of tile {i, j} of op(A).
 
void tileLife (int64_t i, int64_t j, int64_t life)
 Set life counter of tile {i, j} of op(A).
 
void tileTick (int64_t i, int64_t j)
 Decrements life counter of workspace tile {i, j} of op(A).
 
int64_t tileReceiveCount (int64_t i, int64_t j) const
 Returns how many times the tile {i, j} is received through MPI.
 
void tileIncrementReceiveCount (int64_t i, int64_t j)
 Increments the number of times the tile {i, j} is received through MPI.
 
void tileDecrementReceiveCount (int64_t i, int64_t j, int64_t release_count=1)
 Decrements the number of times the tile {i, j} is received through MPI.
 
void tileErase (int64_t i, int64_t j, int device=HostNum)
 Erase tile {i, j} of op(A) on device (host, one device or all devices).
 
void tileRelease (int64_t i, int64_t j, int device=HostNum)
 Erase the tile {i, j}'s instance on device if it is a workspace tile with no hold set on it.
 
void tileSend (int64_t i, int64_t j, int dst_rank, int tag=0)
 Send tile {i, j} of op(A) to the given MPI rank.
 
template<Target target>
void tileSend (int64_t i, int64_t j, int dst_rank, int tag=0)
 
void tileIsend (int64_t i, int64_t j, int dst_rank, int tag, MPI_Request *request)
 Immediately send tile {i, j} of op(A) to the given MPI rank.
 
template<Target target = Target::Host>
void tileRecv (int64_t i, int64_t j, int dst_rank, Layout layout, int tag=0)
 Receive tile {i, j} of op(A) to the given MPI rank.
 
template<Target target = Target::Host>
void tileIrecv (int64_t i, int64_t j, int dst_rank, Layout layout, int tag, MPI_Request *request)
 Receive tile {i, j} of op(A) to the given MPI rank using immediate mode.
 
template<Target target = Target::Host>
void tileBcast (int64_t i, int64_t j, BaseMatrix const &B, Layout layout, int tag=0)
 Send tile {i, j} of op(A) to all MPI ranks in matrix B.
 
template<Target target = Target::Host>
void tileBcast (int64_t i, int64_t j, BaseMatrix const &B, Layout layout, int tag, int64_t life_factor)
 
template<Target target = Target::Host>
void listBcast (BcastList &bcast_list, Layout layout, int tag=0, bool is_shared=false)
 Send tile {i, j} of op(A) to all MPI ranks in the list of submatrices bcast_list.
 
template<Target target = Target::Host>
void listBcast (BcastList &bcast_list, Layout layout, int tag, int64_t life_factor, bool is_shared=false)
 
template<Target target = Target::Host>
void listBcastMT (BcastListTag &bcast_list, Layout layout, bool is_shared=false)
 Send tile {i, j} of op(A) to all MPI ranks in the list of submatrices bcast_list (using OpenMP taskloop and multi-threaded MPI).
 
template<Target target = Target::Host>
void listBcastMT (BcastListTag &bcast_list, Layout layout, int64_t life_factor, bool is_shared=false)
 
template<Target target = Target::Host>
void listReduce (ReduceList &reduce_list, Layout layout, int tag=0)
 
Layout layout () const
 Returns matrix layout flag.
 
Layout tileLayout (int64_t i, int64_t j, int device=HostNum)
 Returns Layout of tile(i, j, device)
 
void tileLayout (int64_t i, int64_t j, int device, Layout layout)
 Sets Layout of tile(i, j, device)
 
void tileLayout (int64_t i, int64_t j, Layout layout)
 Sets Layout of tile(i, j, host)
 
bool tileLayoutIsConvertible (int64_t i, int64_t j, int device=HostNum)
 Returns whether tile(i, j, device) can be safely transposed.
 
void tileLayoutConvert (int64_t i, int64_t j, int device, Layout layout, bool reset=false, bool async=false)
 Converts tile(i, j, device) into 'layout'.
 
void tileLayoutConvert (int64_t i, int64_t j, Layout layout, bool reset=false, bool async=false)
 Convert layout of tile(i, j) to layout on host, optionally reset.
 
void tileLayoutConvert (std::set< ij_tuple > &tile_set, int device, Layout layout, bool reset=false)
 Converts tiles indicated in 'tile_set' that exist on 'device' into 'layout' if not already in 'layout' major.
 
void tileLayoutConvert (std::set< ij_tuple > &tile_set, Layout layout, bool reset=false)
 Convert layout of a set of tiles to layout on host, optionally reset.
 
void tileLayoutConvert (int device, Layout layout, bool reset=false)
 Converts all existing tile instances on 'device' into 'layout' Operates in batch mode.
 
void tileLayoutConvertOnDevices (Layout layout, bool reset=false)
 Converts all existing tile instances on available devices into 'layout'.
 
void tileLayoutReset (int64_t i, int64_t j, int device, Layout layout)
 Converts tile(i, j) into current layout and resets its extended buffer.
 
void tileLayoutReset (int64_t i, int64_t j, Layout layout)
 
void tileLayoutReset (std::set< ij_tuple > &tile_set, int device, Layout layout)
 Converts set of tiles into current layout and resets their extended buffers.
 
void tileLayoutReset (std::set< ij_tuple > &tile_set, Layout layout)
 
void tileLayoutReset ()
 Converts all origin tiles into current matrix-layout.
 
void tileReduceFromSet (int64_t i, int64_t j, int root_rank, std::set< int > &reduce_set, int radix, int tag, Layout layout)
 [internal] WARNING: Sent and received tiles are converted to 'layout' major.
 
void getRanks (std::set< int > *bcast_set) const
 Puts all MPI ranks that have tiles in the matrix into the set.
 
void getLocalDevices (std::set< int > *dev_set) const
 Puts all devices that have local tiles in the matrix into the set.
 
int64_t numLocalTiles () const
 Returns number of local tiles in this matrix.
 
MPI_Comm mpiComm () const
 
int mpiRank () const
 
MPI_Group mpiGroup () const
 
void clear ()
 Removes all tiles from matrix.
 
void releaseWorkspace ()
 Clears all workspace tiles that are not OnHold.
 
void releaseLocalWorkspaceTile (int64_t i, int64_t j)
 Erases a given local workspace tile, if not on hold or modified.
 
void releaseLocalWorkspace ()
 Erases all local workspace tiles, if not on hold or modified.
 
void releaseLocalWorkspace (std::set< ij_tuple > &tile_set)
 Erases a given set of local workspace tiles from all devices including host, if not on hold or modified.
 
void releaseRemoteWorkspaceTile (int64_t i, int64_t j, int64_t release_count=1)
 Erases a given tile that is not local to node from all devices including host, if not on hold or modified.
 
void releaseRemoteWorkspace (int64_t receive_count=1)
 Erases tiles that are not local to node from all devices including host, if not on hold or modified.
 
void releaseRemoteWorkspace (std::set< ij_tuple > &tile_set, int64_t release_count=1)
 Erases a given set of tiles that are not local to node from all devices including host, if not on hold or modified.
 
void clearWorkspace ()
 Removes all temporary host and device workspace tiles from matrix.
 
void allocateBatchArrays (int64_t batch_size, int64_t num_arrays)
 Allocates batch arrays and BLAS++ queues for all devices.
 
void clearBatchArrays ()
 Removes batch arrays from matrix for all devices.
 
int64_t batchArraySize ()
 
scalar_t ** array_host (int device, int64_t batch_arrays_index=0)
 
scalar_t ** array_device (int device, int64_t batch_arrays_index=0)
 
lapack::Queue * comm_queue (int device)
 
lapack::Queue * compute_queue (int device, int queue_index=0)
 
int numComputeQueues ()
 

Protected Member Functions

 BaseBandMatrix ()
 Default constructor creates an empty band matrix with bandwidth = 0.
 
 BaseBandMatrix (int64_t m, int64_t n, int64_t kl, int64_t ku, std::function< int64_t(int64_t j)> &inTileNb, std::function< int(ij_tuple ij)> &inTileRank, std::function< int(ij_tuple ij)> &inTileDevice, MPI_Comm mpi_comm)
 Constructor creates an m-by-n matrix, with no tiles allocated, where tileMb, tileNb, tileRank, tileDevice are given as functions.
 
 BaseBandMatrix (int64_t m, int64_t n, int64_t kl, int64_t ku, int64_t nb, int p, int q, MPI_Comm mpi_comm)
 Constructor creates an m-by-n band matrix, with no tiles allocated, with fixed nb-by-nb tile size and 2D block cyclic distribution.
 
 BaseBandMatrix (int64_t kl, int64_t ku, BaseMatrix< scalar_t > &orig)
 Conversion from general matrix creates shallow copy view of the band region [kl, ku] of the original matrix.
 
 BaseBandMatrix (BaseBandMatrix< scalar_t > &orig, typename BaseMatrix< scalar_t >::Slice slice)
 Sliced matrix constructor creates shallow copy view of parent matrix, A[ row1:row2, col1:col2 ].
 
- Protected Member Functions inherited from slate::BaseMatrix< scalar_t >
 BaseMatrix ()
 [internal] Default constructor creates an empty matrix.
 
 BaseMatrix (int64_t m, int64_t n, std::function< int64_t(int64_t i)> &inTileMb, std::function< int64_t(int64_t j)> &inTileNb, std::function< int(ij_tuple ij)> &inTileRank, std::function< int(ij_tuple ij)> &inTileDevice, MPI_Comm mpi_comm)
 [internal] Construct matrix with mt block rows and nt block columns, such that sum_{i = 0}^{mt-1} tileMb(i) >= m, sum_{j = 0}^{nt-1} tileNb(j) >= n, where tileMb, tileNb, tileRank, tileDevice are given as functions.
 
 BaseMatrix (int64_t m, int64_t n, int64_t mb, int64_t nb, GridOrder order, int nprow, int npcol, MPI_Comm mpi_comm)
 [internal] Construct matrix with mt = ceil( m / mb ) block rows and nt = ceil( n / nb ) block columns, with fixed mb-by-nb tile size and 2D block cyclic distribution.
 
 BaseMatrix (int64_t m, int64_t n, int64_t mb, int64_t nb, int nprow, int npcol, MPI_Comm mpi_comm)
 With order = Col.
 
 BaseMatrix (int64_t m, int64_t n, int64_t nb, int nprow, int npcol, MPI_Comm mpi_comm)
 With mb = nb, order = Col.
 
 BaseMatrix (BaseMatrix &orig, int64_t i1, int64_t i2, int64_t j1, int64_t j2)
 [internal] Sub-matrix constructor creates shallow copy view of parent matrix, B[ i1:i2, j1:j2 ].
 
 BaseMatrix (BaseMatrix &orig, Slice slice)
 [internal] Sliced matrix constructor creates shallow copy view of parent matrix, B[ row1:row2, col1:col2 ].
 
void tileBcastToSet (int64_t i, int64_t j, std::set< int > const &bcast_set)
 
void tileBcastToSet (int64_t i, int64_t j, std::set< int > const &bcast_set, int radix, int tag, Layout layout, Target target)
 [internal] Broadcast tile {i, j} to all MPI ranks in the bcast_set.
 
void tileIbcastToSet (int64_t i, int64_t j, std::set< int > const &bcast_set, int radix, int tag, Layout layout, std::vector< MPI_Request > &send_requests, Target target)
 [internal] Broadcast tile {i, j} to all MPI ranks in the bcast_set.
 
std::tuple< int64_t, int64_t > globalIndex (int64_t i, int64_t j) const
 [internal] Returns index {i, j} in global matrix as a tuple, taking into account the local offset and transpose.
 
std::tuple< int64_t, int64_t, int > globalIndex (int64_t i, int64_t j, int device) const
 [internal] Returns index {i, j, dev} in global matrix as a tuple, taking into account the local offset and transpose.
 
int64_t row0_offset () const
 row offset of first block row.
 
int64_t col0_offset () const
 col offset of first block col.
 
int64_t last_mb () const
 rows in last block row.
 
int64_t last_nb () const
 cols in last block col.
 
int64_t ioffset () const
 block row offset with respect to original matrix
 
int64_t joffset () const
 block col offset with respect to original matrix
 

Protected Attributes

int64_t kl_
 
int64_t ku_
 
- Protected Attributes inherited from slate::BaseMatrix< scalar_t >
Uplo uplo_
 upper or lower storage
 
Op op_
 transpose operation with respect to original matrix
 
Layout layout_
 intended layout of the matrix. defaults to ColMajor.
 
Target origin_
 Tile origins. defaults to Host.
 
std::shared_ptr< MatrixStorage< scalar_t > > storage_
 shared storage of tiles and buffers
 
MPI_Comm mpi_comm_
 
MPI_Group mpi_group_
 
int mpi_rank_
 

Friends

template<typename T >
void swap (BaseBandMatrix< T > &A, BaseBandMatrix< T > &B)
 

Additional Inherited Members

- Static Public Attributes inherited from slate::BaseMatrix< scalar_t >
static constexpr bool is_complex = slate::is_complex<scalar_t>::value
 
static constexpr bool is_real = ! is_complex
 

Detailed Description

template<typename scalar_t>
class slate::BaseBandMatrix< scalar_t >

Base class for all SLATE distributed, tiled banded storage matrices.

Constructor & Destructor Documentation

◆ BaseBandMatrix() [1/4]

template<typename scalar_t >
slate::BaseBandMatrix< scalar_t >::BaseBandMatrix ( int64_t  m,
int64_t  n,
int64_t  kl,
int64_t  ku,
std::function< int64_t(int64_t j)> &  inTileNb,
std::function< int(ij_tuple ij)> &  inTileRank,
std::function< int(ij_tuple ij)> &  inTileDevice,
MPI_Comm  mpi_comm 
)
protected

Constructor creates an m-by-n matrix, with no tiles allocated, where tileMb, tileNb, tileRank, tileDevice are given as functions.

Tiles can be added with tileInsert().

◆ BaseBandMatrix() [2/4]

template<typename scalar_t >
slate::BaseBandMatrix< scalar_t >::BaseBandMatrix ( int64_t  m,
int64_t  n,
int64_t  kl,
int64_t  ku,
int64_t  nb,
int  p,
int  q,
MPI_Comm  mpi_comm 
)
protected

Constructor creates an m-by-n band matrix, with no tiles allocated, with fixed nb-by-nb tile size and 2D block cyclic distribution.

Tiles can be added with tileInsert().

Parameters
[in]mNumber of rows of the matrix. m >= 0.
[in]nNumber of columns of the matrix. n >= 0.
[in]klNumber of subdiagonals within band. kl >= 0.
[in]kuNumber of superdiagonals within band. ku >= 0.
[in]nbBlock size in 2D block-cyclic distribution.
[in]pNumber of block rows in 2D block-cyclic distribution. p > 0.
[in]qNumber of block columns of 2D block-cyclic distribution. q > 0.
[in]mpi_commMPI communicator to distribute matrix across. p*q == MPI_Comm_size(mpi_comm).

◆ BaseBandMatrix() [3/4]

template<typename scalar_t >
slate::BaseBandMatrix< scalar_t >::BaseBandMatrix ( int64_t  kl,
int64_t  ku,
BaseMatrix< scalar_t > &  orig 
)
protected

Conversion from general matrix creates shallow copy view of the band region [kl, ku] of the original matrix.

Parameters
[in]klNumber of subdiagonals within band. kl >= 0.
[in]kuNumber of superdiagonals within band. ku >= 0.
[in]origOriginal matrix of which to make sub-matrix.

◆ BaseBandMatrix() [4/4]

template<typename scalar_t >
slate::BaseBandMatrix< scalar_t >::BaseBandMatrix ( BaseBandMatrix< scalar_t > &  orig,
typename BaseMatrix< scalar_t >::Slice  slice 
)
protected

Sliced matrix constructor creates shallow copy view of parent matrix, A[ row1:row2, col1:col2 ].

This takes row & col indices instead of block row & block col indices.

Parameters
[in]origOriginal matrix of which to make sub-matrix.
[in]sliceContains start and end row and column indices.

Member Function Documentation

◆ allocateBatchArrays()

template<typename scalar_t >
void slate::BaseBandMatrix< scalar_t >::allocateBatchArrays ( int64_t  batch_size = 0,
int64_t  num_arrays = 1 
)

Allocates batch arrays and BLAS++ queues for all devices.

This overrides BaseMatrix::allocateBatchArrays to use the number of local tiles inside the band.

Parameters
[in]batch_sizeAllocate batch arrays as needed so that size of each batch array >= batch_size >= 0. If batch_size = 0 (default), uses batch_size = getMaxDeviceTiles.
[in]num_arraysAllocate batch arrays as needed so that number of batch arrays per device >= num_arrays >= 1.

◆ sub()

template<typename scalar_t >
Matrix< scalar_t > slate::BaseBandMatrix< scalar_t >::sub ( int64_t  i1,
int64_t  i2,
int64_t  j1,
int64_t  j2 
)

Returns sub-matrix that is a shallow copy view of the parent matrix, A[ i1:i2, j1:j2 ].

Parameters
[in]i1Starting block-row index. 0 <= i1 < mt.
[in]i2Ending block-row index (inclusive). i2 < mt.
[in]j1Starting block-column index. 0 <= j1 < nt.
[in]j2Ending block-column index (inclusive). j2 < nt.

The documentation for this class was generated from the following file: