SLATE 2024.05.31
Software for Linear Algebra Targeting Exascale
|
Slate::MatrixStorage class Used to store the map of distributed tiles. More...
#include <MatrixStorage.hh>
Public Types | |
typedef Tile< scalar_t > | Tile_t |
typedef TileNode< scalar_t > | TileNode_t |
using | ijdev_tuple = std::tuple< int64_t, int64_t, int > |
using | ij_tuple = std::tuple< int64_t, int64_t > |
using | TilesMap = std::map< ij_tuple, std::shared_ptr< TileNode_t > > |
Public Member Functions | |
MatrixStorage (int64_t m, int64_t n, int64_t mb, int64_t nb, GridOrder order, int p, int q, MPI_Comm mpi_comm) | |
MatrixStorage (int64_t mt, int64_t nt, std::function< int64_t(int64_t i)> &inTileMb, std::function< int64_t(int64_t j)> &inTileNb, std::function< int(ij_tuple ij)> &inTileRank, std::function< int(ij_tuple ij)> &inTileDevice, MPI_Comm mpi_comm) | |
~MatrixStorage () | |
Destructor deletes all tiles and frees workspace buffers. | |
MatrixStorage (MatrixStorage &orig)=delete | |
MatrixStorage (MatrixStorage &&orig)=delete | |
MatrixStorage & | operator= (MatrixStorage &orig)=delete |
MatrixStorage & | operator= (MatrixStorage &&orig)=delete |
lapack::Queue * | comm_queue (int device) |
lapack::Queue * | compute_queue (int device, int queue_index) |
int | num_compute_queues () |
void | allocateBatchArrays (int64_t batch_size, int64_t num_arrays) |
Allocates batch arrays and BLAS++ queues for all devices. | |
void | clearBatchArrays () |
Frees device batch arrays that were allocated by allocateBatchArrays(). | |
int64_t | batchArraySize () const |
scalar_t ** | batchArrayHost (int device, int64_t batch_arrays_index) |
scalar_t ** | batchArrayDevice (int device, int64_t batch_arrays_index) |
void | reserveHostWorkspace (int64_t num_tiles) |
Reserves num_tiles on host in allocator. | |
void | reserveDeviceWorkspace (int64_t num_tiles) |
Reserves num_tiles on each device in allocator. | |
void | ensureDeviceWorkspace (int device, int64_t num_tiles) |
Ensures there is unoccupied workspace for num_tiles on device in allocator. | |
void | clearWorkspace () |
Clears all host and device workspace tiles. | |
void | releaseWorkspace () |
Clears all host and device workspace tiles that are not OnHold. | |
scalar_t * | allocWorkspaceBuffer (int device, int size) |
Allocates a memory block on device to be used as a workspace buffer, to be released with call to releaseWorkspaceBuffer() | |
void | releaseWorkspaceBuffer (scalar_t *data, int device) |
Release the memory block indicated by data on device to the memory manager. | |
TileNode_t & | at (ij_tuple ij) |
Tile< scalar_t > * | at (ijdev_tuple ijdev) |
void | erase (ijdev_tuple ijdev) |
Remove a tile instance from device and delete it unconditionally. | |
void | erase (ij_tuple ij) |
Remove a tile with all instances on all devices from map and delete it unconditionally. | |
void | release (ijdev_tuple ijdev) |
Remove a tile instance on device and delete it if it is a workspace and not OnHold. | |
void | freeTileMemory (Tile< scalar_t > *tile) |
Return tiles allocated memory and extended memory to the memory factory. | |
void | clear () |
Delete all tiles. | |
omp_nest_lock_t * | getTilesMapLock () |
Return pointer to tiles-map OMP lock. | |
bool | tileIsLocal (ij_tuple ij) |
Tile< scalar_t > * | tileInsert (ijdev_tuple ijdev, TileKind, Layout layout=Layout::ColMajor) |
Inserts tile {i, j} on given device, which can be host, allocating new memory for it. | |
Tile< scalar_t > * | tileInsert (ijdev_tuple ijdev, scalar_t *data, int64_t lda, Layout layout=Layout::ColMajor) |
This is intended for inserting the original matrix. | |
bool | tileExists (ijdev_tuple ijdev) |
void | tileMakeTransposable (Tile< scalar_t > *tile) |
Makes tile layout convertible by extending its data buffer. | |
void | tileLayoutReset (Tile< scalar_t > *tile) |
Resets the extended tile. | |
int64_t | tileReceiveCount (ij_tuple ij) |
void | tileIncrementReceiveCount (ij_tuple ij) |
Increment tile's receive counter. | |
void | tileDecrementReceiveCount (ij_tuple ij, int64_t release_count=1) |
Decrement tile's receive counter. | |
void | tilePrepareToReceive (ij_tuple ij, int device, Layout layout) |
Ensures the tile node exists and increments the receive count. | |
MOSI | tileState (ijdev_tuple ijdev) |
Gets the state of the given tile. | |
MOSI | tileOnHold (ijdev_tuple ijdev) |
Checks whether the given tile is on hold. | |
void | tileUnsetHold (ijdev_tuple ijdev) |
Unsets any hold on the given tile. | |
Static Public Member Functions | |
static int | num_devices () |
Public Attributes | |
std::function< int64_t(int64_t i)> | tileMb |
std::function< int64_t(int64_t j)> | tileNb |
std::function< int(ij_tuple ij)> | tileRank |
std::function< int(ij_tuple ij)> | tileDevice |
Protected Member Functions | |
void | initQueues () |
Initializes BLAS++ compute and communication queues on each device. | |
void | destroyQueues () |
Destroys BLAS++ compute and communication queues on each device. | |
Friends | |
class | Debug |
Slate::MatrixStorage class Used to store the map of distributed tiles.
scalar_t | Data type for the elements of the matrix |
void slate::MatrixStorage< scalar_t >::allocateBatchArrays | ( | int64_t | batch_size, |
int64_t | num_arrays | ||
) |
Allocates batch arrays and BLAS++ queues for all devices.
If arrays are already allocated, frees and reallocates the arrays only if batch_size is larger than the existing size.
[in] | batch_size | Allocate batch arrays as needed so that size of each batch array >= batch_size >= 0. |
[in] | num_arrays | Allocate batch arrays as needed so that number of batch arrays per device >= num_arrays >= 1. |
scalar_t * slate::MatrixStorage< scalar_t >::allocWorkspaceBuffer | ( | int | device, |
int | size | ||
) |
Allocates a memory block on device to be used as a workspace buffer, to be released with call to releaseWorkspaceBuffer()
[in] | device | Device ID (GPU or Host) where the memory block is needed. |
[in] | size | Number of scalars needed in the memory block |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
[in] | device | Tile's device ID. |
|
inline |
[in] | device | Tile's device ID |
[in] | queue_index | The index of a specific set of queues |
|
protected |
Destroys BLAS++ compute and communication queues on each device.
As this is called in the destructor, it should NOT throw exceptions.
void slate::MatrixStorage< scalar_t >::erase | ( | ij_tuple | ij | ) |
Remove a tile with all instances on all devices from map and delete it unconditionally.
If tile's memory was allocated by SLATE, then its memory is freed back to the allocator memory pool.
void slate::MatrixStorage< scalar_t >::erase | ( | ijdev_tuple | ijdev | ) |
Remove a tile instance from device and delete it unconditionally.
If tile node becomes empty, deletes it. If tile's memory was allocated by SLATE, then its memory is freed back to the allocator memory pool.
|
protected |
Initializes BLAS++ compute and communication queues on each device.
Also initializes the host and device batch arrays. Called in constructor.
|
inline |
void slate::MatrixStorage< scalar_t >::release | ( | ijdev_tuple | ijdev | ) |
Remove a tile instance on device and delete it if it is a workspace and not OnHold.
If tile node becomes empty, deletes it. If tile's memory was allocated by SLATE, then its memory is freed back to the allocator memory pool. For local tiles, it ensures that a valid copy remains. device can be AllDevices.
Note that local tiles are currently not released if it would leave all remaining tiles invalid, but this behavior may change in the future and should not be relied on.
void slate::MatrixStorage< scalar_t >::releaseWorkspace |
Clears all host and device workspace tiles that are not OnHold.
For local tiles, it ensures that a valid copy remains.
Note that local tiles are currently not released if it would leave all remaining tiles invalid, but this behavior may change in the future and should not be relied on.
void slate::MatrixStorage< scalar_t >::releaseWorkspaceBuffer | ( | scalar_t * | data, |
int | device | ||
) |
Release the memory block indicated by data on device to the memory manager.
[in] | data | Pointer to memory block to be released. |
[in] | device | Device ID (GPU or Host) where the memory block is. |
Tile< scalar_t > * slate::MatrixStorage< scalar_t >::tileInsert | ( | ijdev_tuple | ijdev, |
scalar_t * | data, | ||
int64_t | lda, | ||
Layout | layout = Layout::ColMajor |
||
) |
This is intended for inserting the original matrix.
Inserts tile {i, j} on given device, which can be host, wrapping existing memory for it. Sets tile kind = TileKind::UserOwned. This will be the origin tile, thus TileNode(i, j) should not pre-exist.
Tile< scalar_t > * slate::MatrixStorage< scalar_t >::tileInsert | ( | ijdev_tuple | ijdev, |
TileKind | kind, | ||
Layout | layout = Layout::ColMajor |
||
) |
Inserts tile {i, j} on given device, which can be host, allocating new memory for it.
Creates TileNode(i, j) if not already exists. Tile kind should be either TileKind::Workspace or TileKind::SlateOwned.
|
inline |
void slate::MatrixStorage< scalar_t >::tileLayoutReset | ( | Tile< scalar_t > * | tile | ) |
Resets the extended tile.
Frees the extended buffer and returns to memory manager then resets the tile's extended member fields
[in,out] | tile | Pointer to extended tile. |
void slate::MatrixStorage< scalar_t >::tileMakeTransposable | ( | Tile< scalar_t > * | tile | ) |
Makes tile layout convertible by extending its data buffer.
Attaches an auxiliary buffer to hold the transposed data when needed.
[in,out] | tile | Pointer to tile to extend its data buffer. |
|
inline |
Ensures the tile node exists and increments the receive count.
If the tile node doesn't exist (i.e., no tile on any device), this inserts a new tile on the given device, which may be host. This does not currently ensure that a tile exists on the given device.
|
inline |