BLAS++ 2024.05.31
BLAS C++ API
Loading...
Searching...
No Matches
device_blas.hh
1// Copyright (c) 2017-2023, University of Tennessee. All rights reserved.
2// SPDX-License-Identifier: BSD-3-Clause
3// This program is free software: you can redistribute it and/or modify it under
4// the terms of the BSD 3-Clause license. See the accompanying LICENSE file.
5
6#include "blas/device.hh"
7
8#include <vector>
9
10namespace blas {
11
12//==============================================================================
13// Level 1 BLAS
14// Alphabetical order
15
16//------------------------------------------------------------------------------
17void axpy(
18 int64_t n,
19 float alpha,
20 float const* x, int64_t incx,
21 float* y, int64_t incy,
22 blas::Queue& queue );
23
24void axpy(
25 int64_t n,
26 double alpha,
27 double const* x, int64_t incx,
28 double* y, int64_t incy,
29 blas::Queue& queue );
30
31void axpy(
32 int64_t n,
33 std::complex<float> alpha,
34 std::complex<float> const* x, int64_t incx,
35 std::complex<float>* y, int64_t incy,
36 blas::Queue& queue );
37
38void axpy(
39 int64_t n,
40 std::complex<double> alpha,
41 std::complex<double> const* x, int64_t incx,
42 std::complex<double>* y, int64_t incy,
43 blas::Queue& queue );
44
45//------------------------------------------------------------------------------
46void copy(
47 int64_t n,
48 float const* x, int64_t incx,
49 float* y, int64_t incy,
50 blas::Queue& queue );
51
52void copy(
53 int64_t n,
54 double const* x, int64_t incx,
55 double* y, int64_t incy,
56 blas::Queue& queue );
57
58void copy(
59 int64_t n,
60 std::complex<float> const* x, int64_t incx,
61 std::complex<float>* y, int64_t incy,
62 blas::Queue& queue );
63
64void copy(
65 int64_t n,
66 std::complex<double> const* x, int64_t incx,
67 std::complex<double>* y, int64_t incy,
68 blas::Queue& queue );
69
70//------------------------------------------------------------------------------
71void dot(
72 int64_t n,
73 float const* x, int64_t incx,
74 float const* y, int64_t incy,
75 float* result,
76 blas::Queue& queue );
77
78void dot(
79 int64_t n,
80 double const* x, int64_t incx,
81 double const* y, int64_t incy,
82 double* result,
83 blas::Queue& queue );
84
85void dot(
86 int64_t n,
87 std::complex<float> const* x, int64_t incx,
88 std::complex<float> const* y, int64_t incy,
89 std::complex<float>* result,
90 blas::Queue& queue );
91
92void dot(
93 int64_t n,
94 std::complex<double> const* x, int64_t incx,
95 std::complex<double> const* y, int64_t incy,
96 std::complex<double>* result,
97 blas::Queue& queue );
98
99//------------------------------------------------------------------------------
100void dotu(
101 int64_t n,
102 float const* x, int64_t incx,
103 float const* y, int64_t incy,
104 float* result,
105 blas::Queue& queue );
106
107void dotu(
108 int64_t n,
109 double const* x, int64_t incx,
110 double const* y, int64_t incy,
111 double* result,
112 blas::Queue& queue );
113
114void dotu(
115 int64_t n,
116 std::complex<float> const* x, int64_t incx,
117 std::complex<float> const* y, int64_t incy,
118 std::complex<float>* result,
119 blas::Queue& queue );
120
121void dotu(
122 int64_t n,
123 std::complex<double> const* x, int64_t incx,
124 std::complex<double> const* y, int64_t incy,
125 std::complex<double>* result,
126 blas::Queue& queue );
127
128//------------------------------------------------------------------------------
129void nrm2(
130 int64_t n,
131 float const* x, int64_t incx,
132 float* result,
133 blas::Queue& queue );
134
135void nrm2(
136 int64_t n,
137 double const* x, int64_t incx,
138 double* result,
139 blas::Queue& queue );
140
141void nrm2(
142 int64_t n,
143 std::complex<float> const* x, int64_t incx,
144 float* result,
145 blas::Queue& queue );
146
147void nrm2(
148 int64_t n,
149 std::complex<double> const* x, int64_t incx,
150 double* result,
151 blas::Queue& queue );
152
153//------------------------------------------------------------------------------
154void scal(
155 int64_t n,
156 float alpha,
157 float* x, int64_t incx,
158 blas::Queue& queue );
159
160void scal(
161 int64_t n,
162 double alpha,
163 double* x, int64_t incx,
164 blas::Queue& queue );
165
166void scal(
167 int64_t n,
168 std::complex<float> alpha,
169 std::complex<float>* x, int64_t incx,
170 blas::Queue& queue );
171
172void scal(
173 int64_t n,
174 std::complex<double> alpha,
175 std::complex<double>* x, int64_t incx,
176 blas::Queue& queue );
177
178//------------------------------------------------------------------------------
179void swap(
180 int64_t n,
181 float* x, int64_t incx,
182 float* y, int64_t incy,
183 blas::Queue& queue );
184
185void swap(
186 int64_t n,
187 double* x, int64_t incx,
188 double* y, int64_t incy,
189 blas::Queue& queue );
190
191void swap(
192 int64_t n,
193 std::complex<float>* x, int64_t incx,
194 std::complex<float>* y, int64_t incy,
195 blas::Queue& queue );
196
197void swap(
198 int64_t n,
199 std::complex<double>* x, int64_t incx,
200 std::complex<double>* y, int64_t incy,
201 blas::Queue& queue );
202
203//==============================================================================
204// Level 2 BLAS
205
206//==============================================================================
207// Level 3 BLAS
208
209//------------------------------------------------------------------------------
210void gemm(
211 blas::Layout layout,
212 blas::Op transA,
213 blas::Op transB,
214 int64_t m, int64_t n, int64_t k,
215 float alpha,
216 float const* A, int64_t lda,
217 float const* B, int64_t ldb,
218 float beta,
219 float* C, int64_t ldc,
220 blas::Queue& queue );
221
222void gemm(
223 blas::Layout layout,
224 blas::Op transA,
225 blas::Op transB,
226 int64_t m, int64_t n, int64_t k,
227 double alpha,
228 double const* A, int64_t lda,
229 double const* B, int64_t ldb,
230 double beta,
231 double* C, int64_t ldc,
232 blas::Queue& queue );
233
234void gemm(
235 blas::Layout layout,
236 blas::Op transA,
237 blas::Op transB,
238 int64_t m, int64_t n, int64_t k,
239 std::complex<float> alpha,
240 std::complex<float> const* A, int64_t lda,
241 std::complex<float> const* B, int64_t ldb,
242 std::complex<float> beta,
243 std::complex<float>* C, int64_t ldc,
244 blas::Queue& queue );
245
246void gemm(
247 blas::Layout layout,
248 blas::Op transA,
249 blas::Op transB,
250 int64_t m, int64_t n, int64_t k,
251 std::complex<double> alpha,
252 std::complex<double> const* A, int64_t lda,
253 std::complex<double> const* B, int64_t ldb,
254 std::complex<double> beta,
255 std::complex<double>* C, int64_t ldc,
256 blas::Queue& queue );
257
258//------------------------------------------------------------------------------
259void hemm(
260 blas::Layout layout,
261 blas::Side side,
262 blas::Uplo uplo,
263 int64_t m, int64_t n,
264 float alpha,
265 float const* A, int64_t lda,
266 float const* B, int64_t ldb,
267 float beta,
268 float* C, int64_t ldc,
269 blas::Queue& queue );
270
271void hemm(
272 blas::Layout layout,
273 blas::Side side,
274 blas::Uplo uplo,
275 int64_t m, int64_t n,
276 double alpha,
277 double const* A, int64_t lda,
278 double const* B, int64_t ldb,
279 double beta,
280 double* C, int64_t ldc,
281 blas::Queue& queue );
282
283void hemm(
284 blas::Layout layout,
285 blas::Side side,
286 blas::Uplo uplo,
287 int64_t m, int64_t n,
288 std::complex<float> alpha,
289 std::complex<float> const* A, int64_t lda,
290 std::complex<float> const* B, int64_t ldb,
291 std::complex<float> beta,
292 std::complex<float>* C, int64_t ldc,
293 blas::Queue& queue );
294
295void hemm(
296 blas::Layout layout,
297 blas::Side side,
298 blas::Uplo uplo,
299 int64_t m, int64_t n,
300 std::complex<double> alpha,
301 std::complex<double> const* A, int64_t lda,
302 std::complex<double> const* B, int64_t ldb,
303 std::complex<double> beta,
304 std::complex<double>* C, int64_t ldc,
305 blas::Queue& queue );
306
307//------------------------------------------------------------------------------
308void her2k(
309 blas::Layout layout,
310 blas::Uplo uplo,
311 blas::Op trans,
312 int64_t n, int64_t k,
313 float alpha,
314 float const* A, int64_t lda,
315 float const* B, int64_t ldb,
316 float beta,
317 float* C, int64_t ldc,
318 blas::Queue& queue );
319
320void her2k(
321 blas::Layout layout,
322 blas::Uplo uplo,
323 blas::Op trans,
324 int64_t n, int64_t k,
325 double alpha,
326 double const* A, int64_t lda,
327 double const* B, int64_t ldb,
328 double beta,
329 double* C, int64_t ldc,
330 blas::Queue& queue );
331
332void her2k(
333 blas::Layout layout,
334 blas::Uplo uplo,
335 blas::Op trans,
336 int64_t n, int64_t k,
337 std::complex<float> alpha, // note: complex
338 std::complex<float> const* A, int64_t lda,
339 std::complex<float> const* B, int64_t ldb,
340 float beta, // note: real
341 std::complex<float>* C, int64_t ldc,
342 blas::Queue& queue );
343
344void her2k(
345 blas::Layout layout,
346 blas::Uplo uplo,
347 blas::Op trans,
348 int64_t n, int64_t k,
349 std::complex<double> alpha, // note: complex
350 std::complex<double> const* A, int64_t lda,
351 std::complex<double> const* B, int64_t ldb,
352 double beta, // note: real
353 std::complex<double>* C, int64_t ldc,
354 blas::Queue& queue );
355
356//------------------------------------------------------------------------------
357void herk(
358 blas::Layout layout,
359 blas::Uplo uplo,
360 blas::Op trans,
361 int64_t n, int64_t k,
362 float alpha,
363 float const* A, int64_t lda,
364 float beta,
365 float* C, int64_t ldc,
366 blas::Queue& queue );
367
368void herk(
369 blas::Layout layout,
370 blas::Uplo uplo,
371 blas::Op trans,
372 int64_t n, int64_t k,
373 double alpha,
374 double const* A, int64_t lda,
375 double beta,
376 double* C, int64_t ldc,
377 blas::Queue& queue );
378
379void herk(
380 blas::Layout layout,
381 blas::Uplo uplo,
382 blas::Op trans,
383 int64_t n, int64_t k,
384 float alpha, // note: real
385 std::complex<float> const* A, int64_t lda,
386 float beta, // note: real
387 std::complex<float>* C, int64_t ldc,
388 blas::Queue& queue );
389
390void herk(
391 blas::Layout layout,
392 blas::Uplo uplo,
393 blas::Op trans,
394 int64_t n, int64_t k,
395 double alpha,
396 std::complex<double> const* A, int64_t lda,
397 double beta,
398 std::complex<double>* C, int64_t ldc,
399 blas::Queue& queue );
400
401//------------------------------------------------------------------------------
402void symm(
403 blas::Layout layout,
404 blas::Side side,
405 blas::Uplo uplo,
406 int64_t m, int64_t n,
407 float alpha,
408 float const* A, int64_t lda,
409 float const* B, int64_t ldb,
410 float beta,
411 float* C, int64_t ldc,
412 blas::Queue& queue );
413
414void symm(
415 blas::Layout layout,
416 blas::Side side,
417 blas::Uplo uplo,
418 int64_t m, int64_t n,
419 double alpha,
420 double const* A, int64_t lda,
421 double const* B, int64_t ldb,
422 double beta,
423 double* C, int64_t ldc,
424 blas::Queue& queue );
425
426void symm(
427 blas::Layout layout,
428 blas::Side side,
429 blas::Uplo uplo,
430 int64_t m, int64_t n,
431 std::complex<float> alpha,
432 std::complex<float> const* A, int64_t lda,
433 std::complex<float> const* B, int64_t ldb,
434 std::complex<float> beta,
435 std::complex<float>* C, int64_t ldc,
436 blas::Queue& queue );
437
438void symm(
439 blas::Layout layout,
440 blas::Side side,
441 blas::Uplo uplo,
442 int64_t m, int64_t n,
443 std::complex<double> alpha,
444 std::complex<double> const* A, int64_t lda,
445 std::complex<double> const* B, int64_t ldb,
446 std::complex<double> beta,
447 std::complex<double>* C, int64_t ldc,
448 blas::Queue& queue );
449
450//------------------------------------------------------------------------------
451void syr2k(
452 blas::Layout layout,
453 blas::Uplo uplo,
454 blas::Op trans,
455 int64_t n, int64_t k,
456 float alpha,
457 float const* A, int64_t lda,
458 float const* B, int64_t ldb,
459 float beta,
460 float* C, int64_t ldc,
461 blas::Queue& queue );
462
463void syr2k(
464 blas::Layout layout,
465 blas::Uplo uplo,
466 blas::Op trans,
467 int64_t n, int64_t k,
468 double alpha,
469 double const* A, int64_t lda,
470 double const* B, int64_t ldb,
471 double beta,
472 double* C, int64_t ldc,
473 blas::Queue& queue );
474
475void syr2k(
476 blas::Layout layout,
477 blas::Uplo uplo,
478 blas::Op trans,
479 int64_t n, int64_t k,
480 std::complex<float> alpha,
481 std::complex<float> const* A, int64_t lda,
482 std::complex<float> const* B, int64_t ldb,
483 std::complex<float> beta,
484 std::complex<float>* C, int64_t ldc,
485 blas::Queue& queue );
486
487void syr2k(
488 blas::Layout layout,
489 blas::Uplo uplo,
490 blas::Op trans,
491 int64_t n, int64_t k,
492 std::complex<double> alpha,
493 std::complex<double> const* A, int64_t lda,
494 std::complex<double> const* B, int64_t ldb,
495 std::complex<double> beta,
496 std::complex<double>* C, int64_t ldc,
497 blas::Queue& queue );
498
499//------------------------------------------------------------------------------
500void syrk(
501 blas::Layout layout,
502 blas::Uplo uplo,
503 blas::Op trans,
504 int64_t n, int64_t k,
505 float alpha,
506 float const* A, int64_t lda,
507 float beta,
508 float* C, int64_t ldc,
509 blas::Queue& queue );
510
511void syrk(
512 blas::Layout layout,
513 blas::Uplo uplo,
514 blas::Op trans,
515 int64_t n, int64_t k,
516 double alpha,
517 double const* A, int64_t lda,
518 double beta,
519 double* C, int64_t ldc,
520 blas::Queue& queue );
521
522void syrk(
523 blas::Layout layout,
524 blas::Uplo uplo,
525 blas::Op trans,
526 int64_t n, int64_t k,
527 std::complex<float> alpha,
528 std::complex<float> const* A, int64_t lda,
529 std::complex<float> beta,
530 std::complex<float>* C, int64_t ldc,
531 blas::Queue& queue );
532
533void syrk(
534 blas::Layout layout,
535 blas::Uplo uplo,
536 blas::Op trans,
537 int64_t n, int64_t k,
538 std::complex<double> alpha,
539 std::complex<double> const* A, int64_t lda,
540 std::complex<double> beta,
541 std::complex<double>* C, int64_t ldc,
542 blas::Queue& queue );
543
544//------------------------------------------------------------------------------
545void trmm(
546 blas::Layout layout,
547 blas::Side side,
548 blas::Uplo uplo,
549 blas::Op trans,
550 blas::Diag diag,
551 int64_t m,
552 int64_t n,
553 float alpha,
554 float const* A, int64_t lda,
555 float* B, int64_t ldb,
556 blas::Queue& queue );
557
558void trmm(
559 blas::Layout layout,
560 blas::Side side,
561 blas::Uplo uplo,
562 blas::Op trans,
563 blas::Diag diag,
564 int64_t m,
565 int64_t n,
566 double alpha,
567 double const* A, int64_t lda,
568 double* B, int64_t ldb,
569 blas::Queue& queue );
570
571void trmm(
572 blas::Layout layout,
573 blas::Side side,
574 blas::Uplo uplo,
575 blas::Op trans,
576 blas::Diag diag,
577 int64_t m,
578 int64_t n,
579 std::complex<float> alpha,
580 std::complex<float> const* A, int64_t lda,
581 std::complex<float>* B, int64_t ldb,
582 blas::Queue& queue );
583
584void trmm(
585 blas::Layout layout,
586 blas::Side side,
587 blas::Uplo uplo,
588 blas::Op trans,
589 blas::Diag diag,
590 int64_t m,
591 int64_t n,
592 std::complex<double> alpha,
593 std::complex<double> const* A, int64_t lda,
594 std::complex<double>* B, int64_t ldb,
595 blas::Queue& queue );
596
597//------------------------------------------------------------------------------
598void trsm(
599 blas::Layout layout,
600 blas::Side side,
601 blas::Uplo uplo,
602 blas::Op trans,
603 blas::Diag diag,
604 int64_t m,
605 int64_t n,
606 float alpha,
607 float const* A, int64_t lda,
608 float* B, int64_t ldb,
609 blas::Queue& queue );
610
611void trsm(
612 blas::Layout layout,
613 blas::Side side,
614 blas::Uplo uplo,
615 blas::Op trans,
616 blas::Diag diag,
617 int64_t m,
618 int64_t n,
619 double alpha,
620 double const* A, int64_t lda,
621 double* B, int64_t ldb,
622 blas::Queue& queue );
623
624void trsm(
625 blas::Layout layout,
626 blas::Side side,
627 blas::Uplo uplo,
628 blas::Op trans,
629 blas::Diag diag,
630 int64_t m,
631 int64_t n,
632 std::complex<float> alpha,
633 std::complex<float> const* A, int64_t lda,
634 std::complex<float>* B, int64_t ldb,
635 blas::Queue& queue );
636
637void trsm(
638 blas::Layout layout,
639 blas::Side side,
640 blas::Uplo uplo,
641 blas::Op trans,
642 blas::Diag diag,
643 int64_t m,
644 int64_t n,
645 std::complex<double> alpha,
646 std::complex<double> const* A, int64_t lda,
647 std::complex<double>* B, int64_t ldb,
648 blas::Queue& queue );
649
650//==============================================================================
651// Batch BLAS APIs (device)
652//==============================================================================
653namespace batch {
654
655//==============================================================================
656// Level 1 Batch BLAS
657
658//==============================================================================
659// Level 2 Batch BLAS
660
661//==============================================================================
662// Level 3 Batch BLAS
663
664//------------------------------------------------------------------------------
665// batch gemm
666void gemm(
667 blas::Layout layout,
668 std::vector<blas::Op> const& transA,
669 std::vector<blas::Op> const& transB,
670 std::vector<int64_t> const& m,
671 std::vector<int64_t> const& n,
672 std::vector<int64_t> const& k,
673 std::vector<float > const& alpha,
674 std::vector<float*> const& Aarray, std::vector<int64_t> const& lda,
675 std::vector<float*> const& Barray, std::vector<int64_t> const& ldb,
676 std::vector<float > const& beta,
677 std::vector<float*> const& Carray, std::vector<int64_t> const& ldc,
678 size_t batch_size,
679 std::vector<int64_t>& info,
680 blas::Queue& queue );
681
682void gemm(
683 blas::Layout layout,
684 std::vector<blas::Op> const& transA,
685 std::vector<blas::Op> const& transB,
686 std::vector<int64_t> const& m,
687 std::vector<int64_t> const& n,
688 std::vector<int64_t> const& k,
689 std::vector<double > const& alpha,
690 std::vector<double*> const& Aarray, std::vector<int64_t> const& lda,
691 std::vector<double*> const& Barray, std::vector<int64_t> const& ldb,
692 std::vector<double > const& beta,
693 std::vector<double*> const& Carray, std::vector<int64_t> const& ldc,
694 size_t batch_size,
695 std::vector<int64_t>& info,
696 blas::Queue& queue );
697
698void gemm(
699 blas::Layout layout,
700 std::vector<blas::Op> const& transA,
701 std::vector<blas::Op> const& transB,
702 std::vector<int64_t> const& m,
703 std::vector<int64_t> const& n,
704 std::vector<int64_t> const& k,
705 std::vector< std::complex<float> > const& alpha,
706 std::vector< std::complex<float>* > const& Aarray, std::vector<int64_t> const& lda,
707 std::vector< std::complex<float>* > const& Barray, std::vector<int64_t> const& ldb,
708 std::vector< std::complex<float> > const& beta,
709 std::vector< std::complex<float>* > const& Carray, std::vector<int64_t> const& ldc,
710 size_t batch_size,
711 std::vector<int64_t>& info,
712 blas::Queue& queue );
713
714void gemm(
715 blas::Layout layout,
716 std::vector<blas::Op> const& transA,
717 std::vector<blas::Op> const& transB,
718 std::vector<int64_t> const& m,
719 std::vector<int64_t> const& n,
720 std::vector<int64_t> const& k,
721 std::vector< std::complex<double> > const& alpha,
722 std::vector< std::complex<double>* > const& Aarray, std::vector<int64_t> const& lda,
723 std::vector< std::complex<double>* > const& Barray, std::vector<int64_t> const& ldb,
724 std::vector< std::complex<double> > const& beta,
725 std::vector< std::complex<double>* > const& Carray, std::vector<int64_t> const& ldc,
726 size_t batch_size,
727 std::vector<int64_t>& info,
728 blas::Queue& queue );
729
730//------------------------------------------------------------------------------
731// batch gemm, group API
732void gemm(
733 blas::Layout layout,
734 std::vector<blas::Op> const& transA,
735 std::vector<blas::Op> const& transB,
736 std::vector<int64_t> const& m,
737 std::vector<int64_t> const& n,
738 std::vector<int64_t> const& k,
739 std::vector<float > const& alpha,
740 std::vector<float*> const& Aarray, std::vector<int64_t> const& lda,
741 std::vector<float*> const& Barray, std::vector<int64_t> const& ldb,
742 std::vector<float > const& beta,
743 std::vector<float*> const& Carray, std::vector<int64_t> const& ldc,
744 std::vector<size_t> const& group_size,
745 std::vector<int64_t>& info,
746 blas::Queue& queue );
747
748void gemm(
749 blas::Layout layout,
750 std::vector<blas::Op> const& transA,
751 std::vector<blas::Op> const& transB,
752 std::vector<int64_t> const& m,
753 std::vector<int64_t> const& n,
754 std::vector<int64_t> const& k,
755 std::vector<double > const& alpha,
756 std::vector<double*> const& Aarray, std::vector<int64_t> const& lda,
757 std::vector<double*> const& Barray, std::vector<int64_t> const& ldb,
758 std::vector<double > const& beta,
759 std::vector<double*> const& Carray, std::vector<int64_t> const& ldc,
760 std::vector<size_t> const& group_size,
761 std::vector<int64_t>& info,
762 blas::Queue& queue );
763
764void gemm(
765 blas::Layout layout,
766 std::vector<blas::Op> const& transA,
767 std::vector<blas::Op> const& transB,
768 std::vector<int64_t> const& m,
769 std::vector<int64_t> const& n,
770 std::vector<int64_t> const& k,
771 std::vector< std::complex<float> > const& alpha,
772 std::vector< std::complex<float>* > const& Aarray, std::vector<int64_t> const& lda,
773 std::vector< std::complex<float>* > const& Barray, std::vector<int64_t> const& ldb,
774 std::vector< std::complex<float> > const& beta,
775 std::vector< std::complex<float>* > const& Carray, std::vector<int64_t> const& ldc,
776 std::vector<size_t> const& group_size,
777 std::vector<int64_t>& info,
778 blas::Queue& queue );
779
780void gemm(
781 blas::Layout layout,
782 std::vector<blas::Op> const& transA,
783 std::vector<blas::Op> const& transB,
784 std::vector<int64_t> const& m,
785 std::vector<int64_t> const& n,
786 std::vector<int64_t> const& k,
787 std::vector< std::complex<double> > const& alpha,
788 std::vector< std::complex<double>* > const& Aarray, std::vector<int64_t> const& lda,
789 std::vector< std::complex<double>* > const& Barray, std::vector<int64_t> const& ldb,
790 std::vector< std::complex<double> > const& beta,
791 std::vector< std::complex<double>* > const& Carray, std::vector<int64_t> const& ldc,
792 std::vector<size_t> const& group_size,
793 std::vector<int64_t>& info,
794 blas::Queue& queue );
795
796//------------------------------------------------------------------------------
797// batch hemm
798void hemm(
799 blas::Layout layout,
800 std::vector<blas::Side> const& side,
801 std::vector<blas::Uplo> const& uplo,
802 std::vector<int64_t> const& m,
803 std::vector<int64_t> const& n,
804 std::vector<float > const& alpha,
805 std::vector<float*> const& Aarray, std::vector<int64_t> const& lda,
806 std::vector<float*> const& Barray, std::vector<int64_t> const& ldb,
807 std::vector<float > const& beta,
808 std::vector<float*> const& Carray, std::vector<int64_t> const& ldc,
809 size_t batch_size,
810 std::vector<int64_t>& info,
811 blas::Queue& queue );
812
813void hemm(
814 blas::Layout layout,
815 std::vector<blas::Side> const& side,
816 std::vector<blas::Uplo> const& uplo,
817 std::vector<int64_t> const& m,
818 std::vector<int64_t> const& n,
819 std::vector<double > const& alpha,
820 std::vector<double*> const& Aarray, std::vector<int64_t> const& lda,
821 std::vector<double*> const& Barray, std::vector<int64_t> const& ldb,
822 std::vector<double > const& beta,
823 std::vector<double*> const& Carray, std::vector<int64_t> const& ldc,
824 size_t batch_size,
825 std::vector<int64_t>& info,
826 blas::Queue& queue );
827
828void hemm(
829 blas::Layout layout,
830 std::vector<blas::Side> const& side,
831 std::vector<blas::Uplo> const& uplo,
832 std::vector<int64_t> const& m,
833 std::vector<int64_t> const& n,
834 std::vector< std::complex<float> > const& alpha,
835 std::vector< std::complex<float>* > const& Aarray, std::vector<int64_t> const& lda,
836 std::vector< std::complex<float>* > const& Barray, std::vector<int64_t> const& ldb,
837 std::vector< std::complex<float> > const& beta,
838 std::vector< std::complex<float>* > const& Carray, std::vector<int64_t> const& ldc,
839 size_t batch_size,
840 std::vector<int64_t>& info,
841 blas::Queue& queue );
842
843void hemm(
844 blas::Layout layout,
845 std::vector<blas::Side> const& side,
846 std::vector<blas::Uplo> const& uplo,
847 std::vector<int64_t> const& m,
848 std::vector<int64_t> const& n,
849 std::vector< std::complex<double> > const& alpha,
850 std::vector< std::complex<double>* > const& Aarray, std::vector<int64_t> const& lda,
851 std::vector< std::complex<double>* > const& Barray, std::vector<int64_t> const& ldb,
852 std::vector< std::complex<double> > const& beta,
853 std::vector< std::complex<double>* > const& Carray, std::vector<int64_t> const& ldc,
854 size_t batch_size,
855 std::vector<int64_t>& info,
856 blas::Queue& queue );
857
858//------------------------------------------------------------------------------
859// batch her2k
860void her2k(
861 blas::Layout layout,
862 std::vector<blas::Uplo> const& uplo,
863 std::vector<blas::Op> const& trans,
864 std::vector<int64_t> const& n,
865 std::vector<int64_t> const& k,
866 std::vector<float > const& alpha,
867 std::vector<float*> const& Aarray, std::vector<int64_t> const& lda,
868 std::vector<float*> const& Barray, std::vector<int64_t> const& ldb,
869 std::vector<float > const& beta,
870 std::vector<float*> const& Carray, std::vector<int64_t> const& ldc,
871 size_t batch_size,
872 std::vector<int64_t>& info,
873 blas::Queue& queue );
874
875void her2k(
876 blas::Layout layout,
877 std::vector<blas::Uplo> const& uplo,
878 std::vector<blas::Op> const& trans,
879 std::vector<int64_t> const& n,
880 std::vector<int64_t> const& k,
881 std::vector<double > const& alpha,
882 std::vector<double*> const& Aarray, std::vector<int64_t> const& lda,
883 std::vector<double*> const& Barray, std::vector<int64_t> const& ldb,
884 std::vector<double > const& beta,
885 std::vector<double*> const& Carray, std::vector<int64_t> const& ldc,
886 size_t batch_size,
887 std::vector<int64_t>& info,
888 blas::Queue& queue );
889
890void her2k(
891 blas::Layout layout,
892 std::vector<blas::Uplo> const& uplo,
893 std::vector<blas::Op> const& trans,
894 std::vector<int64_t> const& n,
895 std::vector<int64_t> const& k,
896 std::vector< std::complex<float> > const& alpha,
897 std::vector< std::complex<float>* > const& Aarray, std::vector<int64_t> const& lda,
898 std::vector< std::complex<float>* > const& Barray, std::vector<int64_t> const& ldb,
899 std::vector< float > const& beta,
900 std::vector< std::complex<float>* > const& Carray, std::vector<int64_t> const& ldc,
901 size_t batch_size,
902 std::vector<int64_t>& info,
903 blas::Queue& queue );
904
905void her2k(
906 blas::Layout layout,
907 std::vector<blas::Uplo> const& uplo,
908 std::vector<blas::Op> const& trans,
909 std::vector<int64_t> const& n,
910 std::vector<int64_t> const& k,
911 std::vector< std::complex<double> > const& alpha,
912 std::vector< std::complex<double>* > const& Aarray, std::vector<int64_t> const& lda,
913 std::vector< std::complex<double>* > const& Barray, std::vector<int64_t> const& ldb,
914 std::vector< double > const& beta,
915 std::vector< std::complex<double>* > const& Carray, std::vector<int64_t> const& ldc,
916 size_t batch_size,
917 std::vector<int64_t>& info,
918 blas::Queue& queue );
919
920//------------------------------------------------------------------------------
921// batch herk
922void herk(
923 blas::Layout layout,
924 std::vector<blas::Uplo> const& uplo,
925 std::vector<blas::Op> const& trans,
926 std::vector<int64_t> const& n,
927 std::vector<int64_t> const& k,
928 std::vector<float > const& alpha,
929 std::vector<float*> const& Aarray, std::vector<int64_t> const& lda,
930 std::vector<float > const& beta,
931 std::vector<float*> const& Carray, std::vector<int64_t> const& ldc,
932 size_t batch_size,
933 std::vector<int64_t>& info,
934 blas::Queue& queue );
935
936void herk(
937 blas::Layout layout,
938 std::vector<blas::Uplo> const& uplo,
939 std::vector<blas::Op> const& trans,
940 std::vector<int64_t> const& n,
941 std::vector<int64_t> const& k,
942 std::vector<double > const& alpha,
943 std::vector<double*> const& Aarray, std::vector<int64_t> const& lda,
944 std::vector<double > const& beta,
945 std::vector<double*> const& Carray, std::vector<int64_t> const& ldc,
946 size_t batch_size,
947 std::vector<int64_t>& info,
948 blas::Queue& queue );
949
950void herk(
951 blas::Layout layout,
952 std::vector<blas::Uplo> const& uplo,
953 std::vector<blas::Op> const& trans,
954 std::vector<int64_t> const& n,
955 std::vector<int64_t> const& k,
956 std::vector< float > const& alpha,
957 std::vector< std::complex<float>* > const& Aarray, std::vector<int64_t> const& lda,
958 std::vector< float > const& beta,
959 std::vector< std::complex<float>* > const& Carray, std::vector<int64_t> const& ldc,
960 size_t batch_size,
961 std::vector<int64_t>& info,
962 blas::Queue& queue );
963
964void herk(
965 blas::Layout layout,
966 std::vector<blas::Uplo> const& uplo,
967 std::vector<blas::Op> const& trans,
968 std::vector<int64_t> const& n,
969 std::vector<int64_t> const& k,
970 std::vector< double > const& alpha,
971 std::vector< std::complex<double>* > const& Aarray, std::vector<int64_t> const& lda,
972 std::vector< double > const& beta,
973 std::vector< std::complex<double>* > const& Carray, std::vector<int64_t> const& ldc,
974 size_t batch_size,
975 std::vector<int64_t>& info,
976 blas::Queue& queue );
977
978//------------------------------------------------------------------------------
979// batch symm
980void symm(
981 blas::Layout layout,
982 std::vector<blas::Side> const& side,
983 std::vector<blas::Uplo> const& uplo,
984 std::vector<int64_t> const& m,
985 std::vector<int64_t> const& n,
986 std::vector<float > const& alpha,
987 std::vector<float*> const& Aarray, std::vector<int64_t> const& lda,
988 std::vector<float*> const& Barray, std::vector<int64_t> const& ldb,
989 std::vector<float > const& beta,
990 std::vector<float*> const& Carray, std::vector<int64_t> const& ldc,
991 size_t batch_size,
992 std::vector<int64_t>& info,
993 blas::Queue& queue );
994
995void symm(
996 blas::Layout layout,
997 std::vector<blas::Side> const& side,
998 std::vector<blas::Uplo> const& uplo,
999 std::vector<int64_t> const& m,
1000 std::vector<int64_t> const& n,
1001 std::vector<double > const& alpha,
1002 std::vector<double*> const& Aarray, std::vector<int64_t> const& lda,
1003 std::vector<double*> const& Barray, std::vector<int64_t> const& ldb,
1004 std::vector<double > const& beta,
1005 std::vector<double*> const& Carray, std::vector<int64_t> const& ldc,
1006 size_t batch_size,
1007 std::vector<int64_t>& info,
1008 blas::Queue& queue );
1009
1010void symm(
1011 blas::Layout layout,
1012 std::vector<blas::Side> const& side,
1013 std::vector<blas::Uplo> const& uplo,
1014 std::vector<int64_t> const& m,
1015 std::vector<int64_t> const& n,
1016 std::vector< std::complex<float> > const& alpha,
1017 std::vector< std::complex<float>* > const& Aarray, std::vector<int64_t> const& lda,
1018 std::vector< std::complex<float>* > const& Barray, std::vector<int64_t> const& ldb,
1019 std::vector< std::complex<float> > const& beta,
1020 std::vector< std::complex<float>* > const& Carray, std::vector<int64_t> const& ldc,
1021 size_t batch_size,
1022 std::vector<int64_t>& info,
1023 blas::Queue& queue );
1024
1025void symm(
1026 blas::Layout layout,
1027 std::vector<blas::Side> const& side,
1028 std::vector<blas::Uplo> const& uplo,
1029 std::vector<int64_t> const& m,
1030 std::vector<int64_t> const& n,
1031 std::vector< std::complex<double> > const& alpha,
1032 std::vector< std::complex<double>* > const& Aarray, std::vector<int64_t> const& lda,
1033 std::vector< std::complex<double>* > const& Barray, std::vector<int64_t> const& ldb,
1034 std::vector< std::complex<double> > const& beta,
1035 std::vector< std::complex<double>* > const& Carray, std::vector<int64_t> const& ldc,
1036 size_t batch_size,
1037 std::vector<int64_t>& info,
1038 blas::Queue& queue );
1039
1040//------------------------------------------------------------------------------
1041// batch syr2k
1042void syr2k(
1043 blas::Layout layout,
1044 std::vector<blas::Uplo> const& uplo,
1045 std::vector<blas::Op> const& trans,
1046 std::vector<int64_t> const& n,
1047 std::vector<int64_t> const& k,
1048 std::vector<float > const& alpha,
1049 std::vector<float*> const& Aarray, std::vector<int64_t> const& lda,
1050 std::vector<float*> const& Barray, std::vector<int64_t> const& ldb,
1051 std::vector<float > const& beta,
1052 std::vector<float*> const& Carray, std::vector<int64_t> const& ldc,
1053 size_t batch_size,
1054 std::vector<int64_t>& info,
1055 blas::Queue& queue );
1056
1057void syr2k(
1058 blas::Layout layout,
1059 std::vector<blas::Uplo> const& uplo,
1060 std::vector<blas::Op> const& trans,
1061 std::vector<int64_t> const& n,
1062 std::vector<int64_t> const& k,
1063 std::vector<double > const& alpha,
1064 std::vector<double*> const& Aarray, std::vector<int64_t> const& lda,
1065 std::vector<double*> const& Barray, std::vector<int64_t> const& ldb,
1066 std::vector<double > const& beta,
1067 std::vector<double*> const& Carray, std::vector<int64_t> const& ldc,
1068 size_t batch_size,
1069 std::vector<int64_t>& info,
1070 blas::Queue& queue );
1071
1072void syr2k(
1073 blas::Layout layout,
1074 std::vector<blas::Uplo> const& uplo,
1075 std::vector<blas::Op> const& trans,
1076 std::vector<int64_t> const& n,
1077 std::vector<int64_t> const& k,
1078 std::vector< std::complex<float> > const& alpha,
1079 std::vector< std::complex<float>* > const& Aarray, std::vector<int64_t> const& lda,
1080 std::vector< std::complex<float>* > const& Barray, std::vector<int64_t> const& ldb,
1081 std::vector< std::complex<float> > const& beta,
1082 std::vector< std::complex<float>* > const& Carray, std::vector<int64_t> const& ldc,
1083 size_t batch_size,
1084 std::vector<int64_t>& info,
1085 blas::Queue& queue );
1086
1087void syr2k(
1088 blas::Layout layout,
1089 std::vector<blas::Uplo> const& uplo,
1090 std::vector<blas::Op> const& trans,
1091 std::vector<int64_t> const& n,
1092 std::vector<int64_t> const& k,
1093 std::vector< std::complex<double> > const& alpha,
1094 std::vector< std::complex<double>* > const& Aarray, std::vector<int64_t> const& lda,
1095 std::vector< std::complex<double>* > const& Barray, std::vector<int64_t> const& ldb,
1096 std::vector< std::complex<double> > const& beta,
1097 std::vector< std::complex<double>* > const& Carray, std::vector<int64_t> const& ldc,
1098 size_t batch_size,
1099 std::vector<int64_t>& info,
1100 blas::Queue& queue );
1101
1102//------------------------------------------------------------------------------
1103// batch syrk
1104void syrk(
1105 blas::Layout layout,
1106 std::vector<blas::Uplo> const& uplo,
1107 std::vector<blas::Op> const& trans,
1108 std::vector<int64_t> const& n,
1109 std::vector<int64_t> const& k,
1110 std::vector<float > const& alpha,
1111 std::vector<float*> const& Aarray, std::vector<int64_t> const& lda,
1112 std::vector<float > const& beta,
1113 std::vector<float*> const& Carray, std::vector<int64_t> const& ldc,
1114 size_t batch_size,
1115 std::vector<int64_t>& info,
1116 blas::Queue& queue );
1117
1118void syrk(
1119 blas::Layout layout,
1120 std::vector<blas::Uplo> const& uplo,
1121 std::vector<blas::Op> const& trans,
1122 std::vector<int64_t> const& n,
1123 std::vector<int64_t> const& k,
1124 std::vector<double > const& alpha,
1125 std::vector<double*> const& Aarray, std::vector<int64_t> const& lda,
1126 std::vector<double > const& beta,
1127 std::vector<double*> const& Carray, std::vector<int64_t> const& ldc,
1128 size_t batch_size,
1129 std::vector<int64_t>& info,
1130 blas::Queue& queue );
1131
1132void syrk(
1133 blas::Layout layout,
1134 std::vector<blas::Uplo> const& uplo,
1135 std::vector<blas::Op> const& trans,
1136 std::vector<int64_t> const& n,
1137 std::vector<int64_t> const& k,
1138 std::vector< std::complex<float> > const& alpha,
1139 std::vector< std::complex<float>* > const& Aarray, std::vector<int64_t> const& lda,
1140 std::vector< std::complex<float> > const& beta,
1141 std::vector< std::complex<float>* > const& Carray, std::vector<int64_t> const& ldc,
1142 size_t batch_size,
1143 std::vector<int64_t>& info,
1144 blas::Queue& queue );
1145
1146void syrk(
1147 blas::Layout layout,
1148 std::vector<blas::Uplo> const& uplo,
1149 std::vector<blas::Op> const& trans,
1150 std::vector<int64_t> const& n,
1151 std::vector<int64_t> const& k,
1152 std::vector< std::complex<double> > const& alpha,
1153 std::vector< std::complex<double>* > const& Aarray, std::vector<int64_t> const& lda,
1154 std::vector< std::complex<double> > const& beta,
1155 std::vector< std::complex<double>* > const& Carray, std::vector<int64_t> const& ldc,
1156 size_t batch_size,
1157 std::vector<int64_t>& info,
1158 blas::Queue& queue );
1159
1160//------------------------------------------------------------------------------
1161// batch trmm
1162void trmm(
1163 blas::Layout layout,
1164 std::vector<blas::Side> const& side,
1165 std::vector<blas::Uplo> const& uplo,
1166 std::vector<blas::Op> const& trans,
1167 std::vector<blas::Diag> const& diag,
1168 std::vector<int64_t> const& m,
1169 std::vector<int64_t> const& n,
1170 std::vector<float > const& alpha,
1171 std::vector<float*> const& Aarray, std::vector<int64_t> const& lda,
1172 std::vector<float*> const& Barray, std::vector<int64_t> const& ldb,
1173 size_t batch_size,
1174 std::vector<int64_t>& info,
1175 blas::Queue& queue );
1176
1177void trmm(
1178 blas::Layout layout,
1179 std::vector<blas::Side> const& side,
1180 std::vector<blas::Uplo> const& uplo,
1181 std::vector<blas::Op> const& trans,
1182 std::vector<blas::Diag> const& diag,
1183 std::vector<int64_t> const& m,
1184 std::vector<int64_t> const& n,
1185 std::vector<double > const& alpha,
1186 std::vector<double*> const& Aarray, std::vector<int64_t> const& lda,
1187 std::vector<double*> const& Barray, std::vector<int64_t> const& ldb,
1188 size_t batch_size,
1189 std::vector<int64_t>& info,
1190 blas::Queue& queue );
1191
1192void trmm(
1193 blas::Layout layout,
1194 std::vector<blas::Side> const& side,
1195 std::vector<blas::Uplo> const& uplo,
1196 std::vector<blas::Op> const& trans,
1197 std::vector<blas::Diag> const& diag,
1198 std::vector<int64_t> const& m,
1199 std::vector<int64_t> const& n,
1200 std::vector< std::complex<float> > const& alpha,
1201 std::vector< std::complex<float>* > const& Aarray, std::vector<int64_t> const& lda,
1202 std::vector< std::complex<float>* > const& Barray, std::vector<int64_t> const& ldb,
1203 size_t batch_size,
1204 std::vector<int64_t>& info,
1205 blas::Queue& queue );
1206
1207void trmm(
1208 blas::Layout layout,
1209 std::vector<blas::Side> const& side,
1210 std::vector<blas::Uplo> const& uplo,
1211 std::vector<blas::Op> const& trans,
1212 std::vector<blas::Diag> const& diag,
1213 std::vector<int64_t> const& m,
1214 std::vector<int64_t> const& n,
1215 std::vector< std::complex<double> > const& alpha,
1216 std::vector< std::complex<double>* > const& Aarray, std::vector<int64_t> const& lda,
1217 std::vector< std::complex<double>* > const& Barray, std::vector<int64_t> const& ldb,
1218 size_t batch_size,
1219 std::vector<int64_t>& info,
1220 blas::Queue& queue );
1221
1222//------------------------------------------------------------------------------
1223// batch trsm
1224void trsm(
1225 blas::Layout layout,
1226 std::vector<blas::Side> const& side,
1227 std::vector<blas::Uplo> const& uplo,
1228 std::vector<blas::Op> const& trans,
1229 std::vector<blas::Diag> const& diag,
1230 std::vector<int64_t> const& m,
1231 std::vector<int64_t> const& n,
1232 std::vector<float > const& alpha,
1233 std::vector<float*> const& Aarray, std::vector<int64_t> const& lda,
1234 std::vector<float*> const& Barray, std::vector<int64_t> const& ldb,
1235 size_t batch_size,
1236 std::vector<int64_t>& info,
1237 blas::Queue& queue );
1238
1239void trsm(
1240 blas::Layout layout,
1241 std::vector<blas::Side> const& side,
1242 std::vector<blas::Uplo> const& uplo,
1243 std::vector<blas::Op> const& trans,
1244 std::vector<blas::Diag> const& diag,
1245 std::vector<int64_t> const& m,
1246 std::vector<int64_t> const& n,
1247 std::vector<double > const& alpha,
1248 std::vector<double*> const& Aarray, std::vector<int64_t> const& lda,
1249 std::vector<double*> const& Barray, std::vector<int64_t> const& ldb,
1250 size_t batch_size,
1251 std::vector<int64_t>& info,
1252 blas::Queue& queue );
1253
1254void trsm(
1255 blas::Layout layout,
1256 std::vector<blas::Side> const& side,
1257 std::vector<blas::Uplo> const& uplo,
1258 std::vector<blas::Op> const& trans,
1259 std::vector<blas::Diag> const& diag,
1260 std::vector<int64_t> const& m,
1261 std::vector<int64_t> const& n,
1262 std::vector< std::complex<float> > const& alpha,
1263 std::vector< std::complex<float>* > const& Aarray, std::vector<int64_t> const& lda,
1264 std::vector< std::complex<float>* > const& Barray, std::vector<int64_t> const& ldb,
1265 size_t batch_size,
1266 std::vector<int64_t>& info,
1267 blas::Queue& queue );
1268
1269void trsm(
1270 blas::Layout layout,
1271 std::vector<blas::Side> const& side,
1272 std::vector<blas::Uplo> const& uplo,
1273 std::vector<blas::Op> const& trans,
1274 std::vector<blas::Diag> const& diag,
1275 std::vector<int64_t> const& m,
1276 std::vector<int64_t> const& n,
1277 std::vector< std::complex<double> > const& alpha,
1278 std::vector< std::complex<double>* > const& Aarray, std::vector<int64_t> const& lda,
1279 std::vector< std::complex<double>* > const& Barray, std::vector<int64_t> const& ldb,
1280 size_t batch_size,
1281 std::vector<int64_t>& info,
1282 blas::Queue& queue );
1283
1284} // namespace batch
1285} // namespace blas
Queue for executing GPU device routines.
Definition device.hh:143
void axpy(int64_t n, blas::scalar_type< TX, TY > alpha, TX const *x, int64_t incx, TY *y, int64_t incy)
Add scaled vector, .
Definition axpy.hh:43
void copy(int64_t n, TX const *x, int64_t incx, TY *y, int64_t incy)
Copy vector, .
Definition copy.hh:40
void dot(int64_t n, float const *x, int64_t incx, float const *y, int64_t incy, float *result, blas::Queue &queue)
GPU device, float version.
Definition device_dot.cc:139
void dotu(int64_t n, float const *x, int64_t incx, float const *y, int64_t incy, float *result, blas::Queue &queue)
GPU device, float, unconjugated x^T y version.
Definition device_dot.cc:194
void gemm(blas::Layout layout, std::vector< blas::Op > const &transA, std::vector< blas::Op > const &transB, std::vector< int64_t > const &m, std::vector< int64_t > const &n, std::vector< int64_t > const &k, std::vector< float > const &alpha, std::vector< float * > const &Aarray, std::vector< int64_t > const &lda, std::vector< float * > const &Barray, std::vector< int64_t > const &ldb, std::vector< float > const &beta, std::vector< float * > const &Carray, std::vector< int64_t > const &ldc, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
GPU device, variable-size batched, float version.
Definition device_batch_gemm.cc:163
void gemm(blas::Layout layout, blas::Op transA, blas::Op transB, int64_t m, int64_t n, int64_t k, float alpha, float const *A, int64_t lda, float const *B, int64_t ldb, float beta, float *C, int64_t ldc, blas::Queue &queue)
GPU device, float version.
Definition device_gemm.cc:119
void hemm(blas::Layout layout, blas::Side side, blas::Uplo uplo, int64_t m, int64_t n, float alpha, float const *A, int64_t lda, float const *B, int64_t ldb, float beta, float *C, int64_t ldc, blas::Queue &queue)
GPU device, float version.
Definition device_hemm.cc:107
void hemm(blas::Layout layout, std::vector< blas::Side > const &side, std::vector< blas::Uplo > const &uplo, std::vector< int64_t > const &m, std::vector< int64_t > const &n, std::vector< float > const &alpha, std::vector< float * > const &Aarray, std::vector< int64_t > const &lda, std::vector< float * > const &Barray, std::vector< int64_t > const &ldb, std::vector< float > const &beta, std::vector< float * > const &Carray, std::vector< int64_t > const &ldc, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
GPU device, variable-size batched, float version.
Definition device_batch_hemm.cc:102
void her2k(blas::Layout layout, std::vector< blas::Uplo > const &uplo, std::vector< blas::Op > const &trans, std::vector< int64_t > const &n, std::vector< int64_t > const &k, std::vector< float > const &alpha, std::vector< float * > const &Aarray, std::vector< int64_t > const &lda, std::vector< float * > const &Barray, std::vector< int64_t > const &ldb, std::vector< float > const &beta, std::vector< float * > const &Carray, std::vector< int64_t > const &ldc, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
GPU device, variable-size batched, float version.
Definition device_batch_her2k.cc:89
void her2k(blas::Layout layout, blas::Uplo uplo, blas::Op trans, int64_t n, int64_t k, float alpha, float const *A, int64_t lda, float const *B, int64_t ldb, float beta, float *C, int64_t ldc, blas::Queue &queue)
GPU device, float version.
Definition device_her2k.cc:100
void herk(blas::Layout layout, std::vector< blas::Uplo > const &uplo, std::vector< blas::Op > const &trans, std::vector< int64_t > const &n, std::vector< int64_t > const &k, std::vector< float > const &alpha, std::vector< float * > const &Aarray, std::vector< int64_t > const &lda, std::vector< float > const &beta, std::vector< float * > const &Carray, std::vector< int64_t > const &ldc, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
GPU device, variable-size batched, float version.
Definition device_batch_herk.cc:87
void herk(blas::Layout layout, blas::Uplo uplo, blas::Op trans, int64_t n, int64_t k, float alpha, float const *A, int64_t lda, float beta, float *C, int64_t ldc, blas::Queue &queue)
GPU device, float version.
Definition device_herk.cc:92
void nrm2(int64_t n, float const *x, int64_t incx, float *result, blas::Queue &queue)
GPU device, float version.
Definition device_nrm2.cc:84
void scal(int64_t n, float alpha, float *x, int64_t incx, blas::Queue &queue)
GPU device, float version.
Definition device_scal.cc:65
void swap(int64_t n, float *x, int64_t incx, float *y, int64_t incy, blas::Queue &queue)
GPU device, float version.
Definition device_swap.cc:67
void symm(blas::Layout layout, blas::Side side, blas::Uplo uplo, int64_t m, int64_t n, float alpha, float const *A, int64_t lda, float const *B, int64_t ldb, float beta, float *C, int64_t ldc, blas::Queue &queue)
GPU device, float version.
Definition device_symm.cc:106
void symm(blas::Layout layout, std::vector< blas::Side > const &side, std::vector< blas::Uplo > const &uplo, std::vector< int64_t > const &m, std::vector< int64_t > const &n, std::vector< float > const &alpha, std::vector< float * > const &Aarray, std::vector< int64_t > const &lda, std::vector< float * > const &Barray, std::vector< int64_t > const &ldb, std::vector< float > const &beta, std::vector< float * > const &Carray, std::vector< int64_t > const &ldc, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
GPU device, variable-size batched, float version.
Definition device_batch_symm.cc:87
void syr2k(blas::Layout layout, blas::Uplo uplo, blas::Op trans, int64_t n, int64_t k, float alpha, float const *A, int64_t lda, float const *B, int64_t ldb, float beta, float *C, int64_t ldc, blas::Queue &queue)
GPU device, float version.
Definition device_syr2k.cc:107
void syr2k(blas::Layout layout, std::vector< blas::Uplo > const &uplo, std::vector< blas::Op > const &trans, std::vector< int64_t > const &n, std::vector< int64_t > const &k, std::vector< float > const &alpha, std::vector< float * > const &Aarray, std::vector< int64_t > const &lda, std::vector< float * > const &Barray, std::vector< int64_t > const &ldb, std::vector< float > const &beta, std::vector< float * > const &Carray, std::vector< int64_t > const &ldc, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
GPU device, variable-size batched, float version.
Definition device_batch_syr2k.cc:87
void syrk(blas::Layout layout, std::vector< blas::Uplo > const &uplo, std::vector< blas::Op > const &trans, std::vector< int64_t > const &n, std::vector< int64_t > const &k, std::vector< float > const &alpha, std::vector< float * > const &Aarray, std::vector< int64_t > const &lda, std::vector< float > const &beta, std::vector< float * > const &Carray, std::vector< int64_t > const &ldc, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
GPU device, variable-size batched, float version.
Definition device_batch_syrk.cc:84
void syrk(blas::Layout layout, blas::Uplo uplo, blas::Op trans, int64_t n, int64_t k, float alpha, float const *A, int64_t lda, float beta, float *C, int64_t ldc, blas::Queue &queue)
GPU device, float version.
Definition device_syrk.cc:101
void trmm(blas::Layout layout, blas::Side side, blas::Uplo uplo, blas::Op trans, blas::Diag diag, int64_t m, int64_t n, float alpha, float const *A, int64_t lda, float *B, int64_t ldb, blas::Queue &queue)
GPU device, float version.
Definition device_trmm.cc:104
void trmm(blas::Layout layout, std::vector< blas::Side > const &side, std::vector< blas::Uplo > const &uplo, std::vector< blas::Op > const &trans, std::vector< blas::Diag > const &diag, std::vector< int64_t > const &m, std::vector< int64_t > const &n, std::vector< float > const &alpha, std::vector< float * > const &Aarray, std::vector< int64_t > const &lda, std::vector< float * > const &Barray, std::vector< int64_t > const &ldb, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
GPU device, variable-size batched, float version.
Definition device_batch_trmm.cc:95
void trsm(blas::Layout layout, std::vector< blas::Side > const &side, std::vector< blas::Uplo > const &uplo, std::vector< blas::Op > const &trans, std::vector< blas::Diag > const &diag, std::vector< int64_t > const &m, std::vector< int64_t > const &n, std::vector< float > const &alpha, std::vector< float * > const &Aarray, std::vector< int64_t > const &lda, std::vector< float * > const &Barray, std::vector< int64_t > const &ldb, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
GPU device, variable-size batched, float version.
Definition device_batch_trsm.cc:145
void trsm(blas::Layout layout, blas::Side side, blas::Uplo uplo, blas::Op trans, blas::Diag diag, int64_t m, int64_t n, float alpha, float const *A, int64_t lda, float *B, int64_t ldb, blas::Queue &queue)
GPU device, float version.
Definition device_trsm.cc:104