SLATE 2024.05.31
Software for Linear Algebra Targeting Exascale
|
Tile holding an mb-by-nb matrix, with leading dimension (stride). More...
#include <Tile.hh>
Public Member Functions | |
Tile () | |
Create empty tile. | |
Tile (int64_t mb, int64_t nb, scalar_t *A, int64_t lda, int device, TileKind kind, Layout layout=Layout::ColMajor, MOSI_State mosi_state=MOSI::Invalid) | |
Create tile that wraps existing memory buffer. | |
Tile (Tile< scalar_t > src_tile, scalar_t *A, int64_t lda, TileKind kind, MOSI_State mosi_state=MOSI::Invalid) | |
Create tile based on an existing tile and use existing memory buffer. | |
void | copyData (Tile< scalar_t > *dst_tile, blas::Queue &queue, bool async=false) const |
Copies data from this tile to dst_tile. | |
void | copyData (Tile< scalar_t > *dst_tile) const |
copies this tile's data to dst_tile data, both assumed on host | |
void | send (int dst, MPI_Comm mpi_comm, int tag=0) const |
Sends tile to MPI rank dst. | |
void | isend (int dst, MPI_Comm mpi_comm, int tag, MPI_Request *req) const |
Sends tile to MPI rank dst. | |
void | recv (int src, MPI_Comm mpi_comm, Layout layout, int tag=0) |
Receives tile from MPI rank src. | |
void | irecv (int src, MPI_Comm mpi_comm, Layout layout, int tag, MPI_Request *req) |
Receives tile from MPI rank src using immediate mode. | |
void | bcast (int bcast_root, MPI_Comm mpi_comm) |
Broadcasts tile from MPI rank bcast_root, using given communicator. | |
int64_t | mb () const |
Returns number of rows of op(A), where A is this tile. | |
int64_t | nb () const |
Returns number of cols of op(A), where A is this tile. | |
int64_t | stride () const |
Returns column stride of this tile. | |
void | stride (int64_t in_stride) |
Sets column stride of this tile. | |
scalar_t const * | data () const |
Returns const pointer to data, i.e., A(0,0), where A is this tile. | |
scalar_t * | data () |
Returns pointer to data, i.e., A(0,0), where A is this tile. | |
scalar_t | operator() (int64_t i, int64_t j) const |
Returns element {i, j} of op(A). | |
scalar_t const & | at (int64_t i, int64_t j) const |
Returns a const reference to element {i, j} of op(A). | |
scalar_t & | at (int64_t i, int64_t j) |
Returns a reference to element {i, j} of op(A). | |
int64_t | rowIncrement () const |
Return the number of elements to increment to get to the next element in the row, accounting for row-or-column major layout and transposed tiles. | |
int64_t | colIncrement () const |
Return the number of elements to increment to get to the next element in the column, accounting for row-or-column major layout and transposed tiles. | |
bool | origin () const |
Returns true if this is an origin (local non-workspace) tile. | |
bool | workspace () const |
Returns true if this is a workspace tile. | |
bool | allocated () const |
Returns true if SLATE allocated this tile's memory, false if the user provided the tile's memory, e.g., via a fromScaLAPACK constructor. | |
TileKind | kind () |
Returns the TileKind of this tile. | |
size_t | bytes () const |
Returns number of bytes; but NOT consecutive if stride != mb_. | |
size_t | size () const |
Returns number of elements; but NOT consecutive if stride != mb_. | |
Uplo | uplo () const |
Returns whether op(A) is logically Lower, Upper, or General storage. | |
Uplo | uploLogical () const |
Returns whether op(A) is logically Upper, Lower, or General storage, taking the transposition operation into account. | |
Uplo | uploPhysical () const |
Returns whether A is Upper, Lower, or General storage, ignoring the transposition operation. | |
Uplo | uplo_logical () const |
void | uplo (Uplo uplo) |
Sets upper, lower, or general storage flag. | |
Op | op () const |
Returns transposition operation. | |
void | op (Op op) |
Sets transposition operation. | |
int | device () const |
Returns which host or GPU device tile's data is located on. | |
Layout | layout () const |
Layout | userLayout () const |
void | setLayout (Layout in_layout) |
Set's the tile's layout, updating the stride and front buffer as need be. | |
void | layout (Layout in_layout) |
bool | isContiguous () const |
bool | isUserContiguous () const |
bool | isTransposable () |
Returns whether this tile can safely store its data in transposed form based on its 'TileKind', buffer size, Layout, and stride. | |
void | makeTransposable (scalar_t *data) |
Attaches the new_data buffer to this tile as an extended buffer extended buffer to be used to hold the transposed data of rectangular tiles Marks the tile as extended NOTE: does not set the front buffer to be the extended one NOTE: throws error if not already transposable. | |
void | layoutReset () |
Resets the tile's member fields related to being extended. | |
bool | extended () const |
scalar_t * | extData () |
scalar_t * | userData () |
void | layoutSetFrontDataExt (bool front=true) |
Sets the front buffer of the extended tile, and adjusts stride accordingly. | |
scalar_t * | layoutBackData () |
int64_t | layoutBackStride () const |
void | layoutConvert (scalar_t *work_data=nullptr) |
Convert layout (Column / Row major) of this tile (host CPU implementation). | |
void | layoutConvert (scalar_t *work_data, blas::Queue &queue, bool async=false) |
Convert layout (Column / Row major) of this tile (device GPU implementation). | |
void | layoutConvert (blas::Queue &queue, bool async=false) |
Overload with work_data = nullptr. | |
void | set (scalar_t alpha) |
Set tile data to constant. | |
void | set (scalar_t alpha, scalar_t beta) |
Set tile data to constants. | |
MOSI | state () |
Returns the MOSI status of the tile. | |
bool | stateOn (MOSI_State stateIn) const |
returns whether the Modified/Shared/Invalid state or the OnHold flag is On | |
Tile< scalar_t > | slice (Op op, int64_t i, int64_t j, int64_t mb, int64_t nb, Uplo uplo) |
Creates a tile with the same data that slices the view of this tile. | |
Static Public Attributes | |
static constexpr bool | is_complex = slate::is_complex<scalar_t>::value |
static constexpr bool | is_real = ! is_complex |
Protected Member Functions | |
void | state (MOSI_State stateIn) |
Protected Attributes | |
int64_t | mb_ |
int64_t | nb_ |
int64_t | stride_ |
int64_t | user_stride_ |
scalar_t * | data_ |
scalar_t * | user_data_ |
scalar_t * | ext_data_ |
Op | op_ |
Uplo | uplo_ |
TileKind | kind_ |
Layout | layout_ |
layout_: The physical ordering of elements in the data buffer: | |
Layout | user_layout_ |
int | device_ |
MOSI_State | mosi_state_ |
Friends | |
template<typename T > | |
class | BaseMatrix |
template<typename T > | |
class | TileNode |
template<typename T > | |
class | MatrixStorage |
template<typename TileType > | |
TileType | transpose (TileType &A) |
Returns shallow copy of tile that is transposed. | |
template<typename TileType > | |
TileType | conj_transpose (TileType &A) |
Returns shallow copy of tile that is conjugate-transposed. | |
Tile holding an mb-by-nb matrix, with leading dimension (stride).
slate::Tile< scalar_t >::Tile | ( | int64_t | mb, |
int64_t | nb, | ||
scalar_t * | A, | ||
int64_t | lda, | ||
int | device, | ||
TileKind | kind, | ||
Layout | layout = Layout::ColMajor , |
||
MOSI_State | mosi_state = MOSI::Invalid |
||
) |
Create tile that wraps existing memory buffer.
[in] | mb | Number of rows of the tile. mb >= 0. |
[in] | nb | Number of columns of the tile. nb >= 0. |
[in,out] | A | The mb-by-nb tile A, stored in an lda-by-nb array if ColMajor, or lda-by-mb array if RowMajor. |
[in] | lda | Leading dimension of the array A. lda >= mb if ColMajor. lda >= nb if RowMajor. |
[in] | device | Tile's device ID. |
[in] | kind | The kind of tile:
|
[in] | layout | The physical ordering of elements in the data buffer:
|
slate::Tile< scalar_t >::Tile | ( | Tile< scalar_t > | src_tile, |
scalar_t * | A, | ||
int64_t | lda, | ||
TileKind | kind, | ||
MOSI_State | mosi_state = MOSI::Invalid |
||
) |
Create tile based on an existing tile and use existing memory buffer.
[in] | src_tile | Tile to copy metadata from |
[in,out] | A | The mb-by-nb tile A, stored in an lda-by-nb array if ColMajor, or lda-by-mb array if RowMajor. |
[in] | kind | The kind of tile:
|
scalar_t & slate::Tile< scalar_t >::at | ( | int64_t | i, |
int64_t | j | ||
) |
Returns a reference to element {i, j} of op(A).
If op() is ConjTrans, data is NOT conjugated, because a reference is returned. Use operator() to get the actual value, conjugated if need be.
[in] | i | Row index. 0 <= i < mb. |
[in] | j | Column index. 0 <= j < nb. |
scalar_t const & slate::Tile< scalar_t >::at | ( | int64_t | i, |
int64_t | j | ||
) | const |
Returns a const reference to element {i, j} of op(A).
If op() is ConjTrans, data is NOT conjugated, because a reference is returned. Use operator() to get the actual value, conjugated if need be. This takes column-major / row-major layout into account.
[in] | i | Row index. 0 <= i < mb. |
[in] | j | Column index. 0 <= j < nb. |
void slate::Tile< scalar_t >::bcast | ( | int | bcast_root, |
MPI_Comm | mpi_comm | ||
) |
Broadcasts tile from MPI rank bcast_root, using given communicator.
[in] | bcast_root | Root (source) MPI rank in mpi_comm. |
[in] | mpi_comm | MPI communicator. |
void slate::Tile< scalar_t >::copyData | ( | Tile< scalar_t > * | dst_tile | ) | const |
copies this tile's data to dst_tile data, both assumed on host
Copies data from this tile to dst_tile (host to host implementation).
WARNING: device ID set in device_ of both tiles should be properly set.
[in] | dst_tile | Destination tile. |
void slate::Tile< scalar_t >::copyData | ( | Tile< scalar_t > * | dst_tile, |
blas::Queue & | queue, | ||
bool | async = false |
||
) | const |
Copies data from this tile to dst_tile.
Figures out the direction of copy and the source and destination devices from the destination tile and this tile. WARNING: device ID set in device_ of both tiles should be properly set.
[in] | dst_tile | Destination tile. |
[in] | queue | BLAS++ queue for copy if needed. |
[in] | async | If false, don't synchronize the device queues (asynchronous mode), otherwise synchronize at every device operation |
|
inline |
|
inline |
void slate::Tile< scalar_t >::irecv | ( | int | src, |
MPI_Comm | mpi_comm, | ||
Layout | layout, | ||
int | tag, | ||
MPI_Request * | request | ||
) |
Receives tile from MPI rank src using immediate mode.
[in] | src | Source MPI rank in mpi_comm. |
[in] | mpi_comm | MPI communicator. |
[in] | layout | Indicates the Layout (ColMajor/RowMajor) of the received data. origin matrix tile afterwards. |
[in] | tag | MPI tag |
[out] | request | MPI request object |
|
inline |
void slate::Tile< scalar_t >::isend | ( | int | dst, |
MPI_Comm | mpi_comm, | ||
int | tag, | ||
MPI_Request * | request | ||
) | const |
Sends tile to MPI rank dst.
[in] | dst | Destination MPI rank in mpi_comm. |
[in] | mpi_comm | MPI communicator. |
[in] | tag | MPI tag |
[out] | request | MPI Request object |
|
inline |
Returns whether this tile can safely store its data in transposed form based on its 'TileKind', buffer size, Layout, and stride.
todo: validate and handle sliced-matrix
|
inline |
|
inline |
|
inline |
void slate::Tile< scalar_t >::layoutConvert | ( | scalar_t * | work_data, |
blas::Queue & | queue, | ||
bool | async = false |
||
) |
Convert layout (Column / Row major) of this tile (device GPU implementation).
Performs:
Tile must be transposable already, should call makeTransposable() if not. A BLAS++ queue should be provided if tile instance is on a device.
[in] | work_data | Pointer to a workspace buffer, needed for out-of-place transpose. |
[in] | queue | BLAS++ queue to run the kernels on the device. |
[in] | async | If false, don't synchronize the device queues (asynchronous mode), otherwise synchronize at every device operation |
void slate::Tile< scalar_t >::layoutConvert | ( | scalar_t * | work_data = nullptr | ) |
Convert layout (Column / Row major) of this tile (host CPU implementation).
Performs:
Tile must be transposable already, should call makeTransposable() if not.
[in] | work_data | Pointer to a workspace buffer, needed for out-of-place transpose. |
void slate::Tile< scalar_t >::layoutReset |
Resets the tile's member fields related to being extended.
WARNING: should be called within MatrixStorage::tileLayoutReset() only. NOTE: the front buffer should be already swapped to be the user buffer, throws error otherwise.
void slate::Tile< scalar_t >::layoutSetFrontDataExt | ( | bool | front = true | ) |
Sets the front buffer of the extended tile, and adjusts stride accordingly.
NOTE: tile should be already extended, throws error otherwise.
scalar_t slate::Tile< scalar_t >::operator() | ( | int64_t | i, |
int64_t | j | ||
) | const |
void slate::Tile< scalar_t >::recv | ( | int | src, |
MPI_Comm | mpi_comm, | ||
Layout | layout, | ||
int | tag = 0 |
||
) |
Receives tile from MPI rank src.
[in] | src | Source MPI rank in mpi_comm. |
[in] | mpi_comm | MPI communicator. |
[in] | layout | Indicates the Layout (ColMajor/RowMajor) of the received data. |
[in] | tag | MPI tag |
void slate::Tile< scalar_t >::send | ( | int | dst, |
MPI_Comm | mpi_comm, | ||
int | tag = 0 |
||
) | const |
Sends tile to MPI rank dst.
[in] | dst | Destination MPI rank in mpi_comm. |
[in] | mpi_comm | MPI communicator. |
[in] | tag | MPI tag |
void slate::Tile< scalar_t >::set | ( | scalar_t | value | ) |
Set tile data to constant.
[in] | value | Value set on both diagonal and off-diagonal elements. |
void slate::Tile< scalar_t >::set | ( | scalar_t | offdiag_value, |
scalar_t | diag_value | ||
) |
Set tile data to constants.
[in] | offdiag_value | Value set on off-diagonal elements. |
[in] | diag_value | Value set on diagonal elements. |
Tile< scalar_t > slate::Tile< scalar_t >::slice | ( | Op | op, |
int64_t | i, | ||
int64_t | j, | ||
int64_t | mb, | ||
int64_t | nb, | ||
Uplo | uplo | ||
) |
Creates a tile with the same data that slices the view of this tile.
Specifically offsets the data pointer to op(A)(i, j), where this is this tile, sets the number of rows and columns to mb, and sets uplo to uplo
[in] | op | Whether the matrix is transposed or not |
[in] | i | Row offset. 0 <= i <= i+mb < this->mb. |
[in] | j | Col offset. 0 <= j <= j+nb < this->nb. |
[in] | mb | Number of rows. 0 <= mb <= this->mb. |
[in] | nb | Number of columns. 0 <= nb <= this->nb. |
[in] | uplo | Upper, lower, or general storage flag |
|
inline |
Returns the MOSI status of the tile.
To check the OnHold flag, use stateOn. Note that this is the MOSI state from when the tile was accessed and may not be up to date with the canonical version.
|
inline |
Uplo slate::Tile< scalar_t >::uploLogical |
Returns whether op(A) is logically Upper, Lower, or General storage, taking the transposition operation into account.
Same as uplo().
Uplo slate::Tile< scalar_t >::uploPhysical |
Returns whether A is Upper, Lower, or General storage, ignoring the transposition operation.
|
inline |
|
protected |
layout_: The physical ordering of elements in the data buffer: