BLAS++ 2024.05.31
BLAS C++ API
Loading...
Searching...
No Matches
wrappers.hh
1// Copyright (c) 2017-2023, University of Tennessee. All rights reserved.
2// SPDX-License-Identifier: BSD-3-Clause
3// This program is free software: you can redistribute it and/or modify it under
4// the terms of the BSD 3-Clause license. See the accompanying LICENSE file.
5
6#include <vector>
7
8#include "blas/util.hh"
9
10namespace blas {
11
12#ifndef BLAS_USE_TEMPLATE
13
14//==============================================================================
15// Level 1 BLAS
16// Alphabetical order
17
18//------------------------------------------------------------------------------
19float asum(
20 int64_t n,
21 float const* x, int64_t incx );
22
23double asum(
24 int64_t n,
25 double const* x, int64_t incx );
26
27float asum(
28 int64_t n,
29 std::complex<float> const* x, int64_t incx );
30
31double asum(
32 int64_t n,
33 std::complex<double> const* x, int64_t incx );
34
35//------------------------------------------------------------------------------
36void axpy(
37 int64_t n,
38 float alpha,
39 float const* x, int64_t incx,
40 float* y, int64_t incy );
41
42void axpy(
43 int64_t n,
44 double alpha,
45 double const* x, int64_t incx,
46 double* y, int64_t incy );
47
48void axpy(
49 int64_t n,
50 std::complex<float> alpha,
51 std::complex<float> const* x, int64_t incx,
52 std::complex<float>* y, int64_t incy );
53
54void axpy(
55 int64_t n,
56 std::complex<double> alpha,
57 std::complex<double> const* x, int64_t incx,
58 std::complex<double>* y, int64_t incy );
59
60//------------------------------------------------------------------------------
61void copy(
62 int64_t n,
63 float const* x, int64_t incx,
64 float* y, int64_t incy );
65
66void copy(
67 int64_t n,
68 double const* x, int64_t incx,
69 double* y, int64_t incy );
70
71void copy(
72 int64_t n,
73 std::complex<float> const* x, int64_t incx,
74 std::complex<float>* y, int64_t incy );
75
76void copy(
77 int64_t n,
78 std::complex<double> const* x, int64_t incx,
79 std::complex<double>* y, int64_t incy );
80
81//------------------------------------------------------------------------------
82float dot(
83 int64_t n,
84 float const* x, int64_t incx,
85 float const* y, int64_t incy );
86
87double dot(
88 int64_t n,
89 double const* x, int64_t incx,
90 double const* y, int64_t incy );
91
92std::complex<float> dot(
93 int64_t n,
94 std::complex<float> const* x, int64_t incx,
95 std::complex<float> const* y, int64_t incy );
96
97std::complex<double> dot(
98 int64_t n,
99 std::complex<double> const* x, int64_t incx,
100 std::complex<double> const* y, int64_t incy );
101
102//------------------------------------------------------------------------------
103float dotu(
104 int64_t n,
105 float const* x, int64_t incx,
106 float const* y, int64_t incy );
107
108double dotu(
109 int64_t n,
110 double const* x, int64_t incx,
111 double const* y, int64_t incy );
112
113std::complex<float> dotu(
114 int64_t n,
115 std::complex<float> const* x, int64_t incx,
116 std::complex<float> const* y, int64_t incy );
117
118std::complex<double> dotu(
119 int64_t n,
120 std::complex<double> const* x, int64_t incx,
121 std::complex<double> const* y, int64_t incy );
122
123//------------------------------------------------------------------------------
124int64_t iamax(
125 int64_t n,
126 float const* x, int64_t incx );
127
128int64_t iamax(
129 int64_t n,
130 double const* x, int64_t incx );
131
132int64_t iamax(
133 int64_t n,
134 std::complex<float> const* x, int64_t incx );
135
136int64_t iamax(
137 int64_t n,
138 std::complex<double> const* x, int64_t incx );
139
140//------------------------------------------------------------------------------
141float nrm2(
142 int64_t n,
143 float const* x, int64_t incx );
144
145double nrm2(
146 int64_t n,
147 double const* x, int64_t incx );
148
149float nrm2(
150 int64_t n,
151 std::complex<float> const* x, int64_t incx );
152
153double nrm2(
154 int64_t n,
155 std::complex<double> const* x, int64_t incx );
156
157//------------------------------------------------------------------------------
158void rot(
159 int64_t n,
160 float* x, int64_t incx,
161 float* y, int64_t incy,
162 float c,
163 float s );
164
165void rot(
166 int64_t n,
167 double* x, int64_t incx,
168 double* y, int64_t incy,
169 double c,
170 double s );
171
175void rot(
176 int64_t n,
177 std::complex<float>* x, int64_t incx,
178 std::complex<float>* y, int64_t incy,
179 float c,
180 float s );
181
185void rot(
186 int64_t n,
187 std::complex<double>* x, int64_t incx,
188 std::complex<double>* y, int64_t incy,
189 double c,
190 double s );
191
195void rot(
196 int64_t n,
197 std::complex<float>* x, int64_t incx,
198 std::complex<float>* y, int64_t incy,
199 float c,
200 std::complex<float> s );
201
205void rot(
206 int64_t n,
207 std::complex<double>* x, int64_t incx,
208 std::complex<double>* y, int64_t incy,
209 double c,
210 std::complex<double> s );
211
212//------------------------------------------------------------------------------
213void rotg(
214 float* a,
215 float* b,
216 float* c,
217 float* s );
218
219void rotg(
220 double* a,
221 double* b,
222 double* c,
223 double* s );
224
225void rotg(
226 std::complex<float>* a,
227 std::complex<float>* b, // const in BLAS implementation, oddly
228 float* c,
229 std::complex<float>* s );
230
231void rotg(
232 std::complex<double>* a,
233 std::complex<double>* b, // const in BLAS implementation, oddly
234 double* c,
235 std::complex<double>* s );
236
237//------------------------------------------------------------------------------
238// only real
239void rotm(
240 int64_t n,
241 float* x, int64_t incx,
242 float* y, int64_t incy,
243 float const param[5] );
244
245void rotm(
246 int64_t n,
247 double* x, int64_t incx,
248 double* y, int64_t incy,
249 double const param[5] );
250
251//------------------------------------------------------------------------------
252// only real
253void rotmg(
254 float* d1,
255 float* d2,
256 float* a,
257 float b,
258 float param[5] );
259
260void rotmg(
261 double* d1,
262 double* d2,
263 double* a,
264 double b,
265 double param[5] );
266
267//------------------------------------------------------------------------------
268void scal(
269 int64_t n,
270 float alpha,
271 float* x, int64_t incx );
272
273void scal(
274 int64_t n,
275 double alpha,
276 double* x, int64_t incx );
277
278void scal(
279 int64_t n,
280 std::complex<float> alpha,
281 std::complex<float>* x, int64_t incx );
282
283void scal(
284 int64_t n,
285 std::complex<double> alpha,
286 std::complex<double>* x, int64_t incx );
287
288//------------------------------------------------------------------------------
289void swap(
290 int64_t n,
291 float* x, int64_t incx,
292 float* y, int64_t incy );
293
294void swap(
295 int64_t n,
296 double* x, int64_t incx,
297 double* y, int64_t incy );
298
299void swap(
300 int64_t n,
301 std::complex<float>* x, int64_t incx,
302 std::complex<float>* y, int64_t incy );
303
304void swap(
305 int64_t n,
306 std::complex<double>* x, int64_t incx,
307 std::complex<double>* y, int64_t incy );
308
309//==============================================================================
310// Level 2 BLAS
311
312//------------------------------------------------------------------------------
313void gemv(
314 blas::Layout layout,
315 blas::Op trans,
316 int64_t m, int64_t n,
317 float alpha,
318 float const* A, int64_t lda,
319 float const* x, int64_t incx,
320 float beta,
321 float* y, int64_t incy );
322
323void gemv(
324 blas::Layout layout,
325 blas::Op trans,
326 int64_t m, int64_t n,
327 double alpha,
328 double const* A, int64_t lda,
329 double const* x, int64_t incx,
330 double beta,
331 double* y, int64_t incy );
332
333void gemv(
334 blas::Layout layout,
335 blas::Op trans,
336 int64_t m, int64_t n,
337 std::complex<float> alpha,
338 std::complex<float> const* A, int64_t lda,
339 std::complex<float> const* x, int64_t incx,
340 std::complex<float> beta,
341 std::complex<float>* y, int64_t incy );
342
343void gemv(
344 blas::Layout layout,
345 blas::Op trans,
346 int64_t m, int64_t n,
347 std::complex<double> alpha,
348 std::complex<double> const* A, int64_t lda,
349 std::complex<double> const* x, int64_t incx,
350 std::complex<double> beta,
351 std::complex<double>* y, int64_t incy );
352
353//------------------------------------------------------------------------------
354void ger(
355 blas::Layout layout,
356 int64_t m, int64_t n,
357 float alpha,
358 float const* x, int64_t incx,
359 float const* y, int64_t incy,
360 float* A, int64_t lda );
361
362void ger(
363 blas::Layout layout,
364 int64_t m, int64_t n,
365 double alpha,
366 double const* x, int64_t incx,
367 double const* y, int64_t incy,
368 double* A, int64_t lda );
369
370void ger(
371 blas::Layout layout,
372 int64_t m, int64_t n,
373 std::complex<float> alpha,
374 std::complex<float> const* x, int64_t incx,
375 std::complex<float> const* y, int64_t incy,
376 std::complex<float>* A, int64_t lda );
377
378void ger(
379 blas::Layout layout,
380 int64_t m, int64_t n,
381 std::complex<double> alpha,
382 std::complex<double> const* x, int64_t incx,
383 std::complex<double> const* y, int64_t incy,
384 std::complex<double>* A, int64_t lda );
385
386//------------------------------------------------------------------------------
387void geru(
388 blas::Layout layout,
389 int64_t m, int64_t n,
390 float alpha,
391 float const* x, int64_t incx,
392 float const* y, int64_t incy,
393 float* A, int64_t lda );
394
395void geru(
396 blas::Layout layout,
397 int64_t m, int64_t n,
398 double alpha,
399 double const* x, int64_t incx,
400 double const* y, int64_t incy,
401 double* A, int64_t lda );
402
403void geru(
404 blas::Layout layout,
405 int64_t m, int64_t n,
406 std::complex<float> alpha,
407 std::complex<float> const* x, int64_t incx,
408 std::complex<float> const* y, int64_t incy,
409 std::complex<float>* A, int64_t lda );
410
411void geru(
412 blas::Layout layout,
413 int64_t m, int64_t n,
414 std::complex<double> alpha,
415 std::complex<double> const* x, int64_t incx,
416 std::complex<double> const* y, int64_t incy,
417 std::complex<double>* A, int64_t lda );
418
419//------------------------------------------------------------------------------
420void hemv(
421 blas::Layout layout,
422 blas::Uplo uplo,
423 int64_t n,
424 float alpha,
425 float const* A, int64_t lda,
426 float const* x, int64_t incx,
427 float beta,
428 float* y, int64_t incy );
429
430void hemv(
431 blas::Layout layout,
432 blas::Uplo uplo,
433 int64_t n,
434 double alpha,
435 double const* A, int64_t lda,
436 double const* x, int64_t incx,
437 double beta,
438 double* y, int64_t incy );
439
440void hemv(
441 blas::Layout layout,
442 blas::Uplo uplo,
443 int64_t n,
444 std::complex<float> alpha,
445 std::complex<float> const* A, int64_t lda,
446 std::complex<float> const* x, int64_t incx,
447 std::complex<float> beta,
448 std::complex<float>* y, int64_t incy );
449
450void hemv(
451 blas::Layout layout,
452 blas::Uplo uplo,
453 int64_t n,
454 std::complex<double> alpha,
455 std::complex<double> const* A, int64_t lda,
456 std::complex<double> const* x, int64_t incx,
457 std::complex<double> beta,
458 std::complex<double>* y, int64_t incy );
459
460//------------------------------------------------------------------------------
461void her(
462 blas::Layout layout,
463 blas::Uplo uplo,
464 int64_t n,
465 float alpha,
466 float const* x, int64_t incx,
467 float* A, int64_t lda );
468
469void her(
470 blas::Layout layout,
471 blas::Uplo uplo,
472 int64_t n,
473 double alpha,
474 double const* x, int64_t incx,
475 double* A, int64_t lda );
476
477void her(
478 blas::Layout layout,
479 blas::Uplo uplo,
480 int64_t n,
481 float alpha,
482 std::complex<float> const* x, int64_t incx,
483 std::complex<float>* A, int64_t lda );
484
485void her(
486 blas::Layout layout,
487 blas::Uplo uplo,
488 int64_t n,
489 double alpha,
490 std::complex<double> const* x, int64_t incx,
491 std::complex<double>* A, int64_t lda );
492
493//------------------------------------------------------------------------------
494void her2(
495 blas::Layout layout,
496 blas::Uplo uplo,
497 int64_t n,
498 float alpha,
499 float const* x, int64_t incx,
500 float const* y, int64_t incy,
501 float* A, int64_t lda );
502
503void her2(
504 blas::Layout layout,
505 blas::Uplo uplo,
506 int64_t n,
507 double alpha,
508 double const* x, int64_t incx,
509 double const* y, int64_t incy,
510 double* A, int64_t lda );
511
512void her2(
513 blas::Layout layout,
514 blas::Uplo uplo,
515 int64_t n,
516 std::complex<float> alpha,
517 std::complex<float> const* x, int64_t incx,
518 std::complex<float> const* y, int64_t incy,
519 std::complex<float>* A, int64_t lda );
520
521void her2(
522 blas::Layout layout,
523 blas::Uplo uplo,
524 int64_t n,
525 std::complex<double> alpha,
526 std::complex<double> const* x, int64_t incx,
527 std::complex<double> const* y, int64_t incy,
528 std::complex<double>* A, int64_t lda );
529
530//------------------------------------------------------------------------------
531void symv(
532 blas::Layout layout,
533 blas::Uplo uplo,
534 int64_t n,
535 float alpha,
536 float const* A, int64_t lda,
537 float const* x, int64_t incx,
538 float beta,
539 float* y, int64_t incy );
540
541void symv(
542 blas::Layout layout,
543 blas::Uplo uplo,
544 int64_t n,
545 double alpha,
546 double const* A, int64_t lda,
547 double const* x, int64_t incx,
548 double beta,
549 double* y, int64_t incy );
550
551void symv(
552 blas::Layout layout,
553 blas::Uplo uplo,
554 int64_t n,
555 std::complex<float> alpha,
556 std::complex<float> const* A, int64_t lda,
557 std::complex<float> const* x, int64_t incx,
558 std::complex<float> beta,
559 std::complex<float>* y, int64_t incy );
560
561void symv(
562 blas::Layout layout,
563 blas::Uplo uplo,
564 int64_t n,
565 std::complex<double> alpha,
566 std::complex<double> const* A, int64_t lda,
567 std::complex<double> const* x, int64_t incx,
568 std::complex<double> beta,
569 std::complex<double>* y, int64_t incy );
570
571//------------------------------------------------------------------------------
572// only real; complex in lapack++
573void syr(
574 blas::Layout layout,
575 blas::Uplo uplo,
576 int64_t n,
577 float alpha,
578 float const* x, int64_t incx,
579 float* A, int64_t lda );
580
581void syr(
582 blas::Layout layout,
583 blas::Uplo uplo,
584 int64_t n,
585 double alpha,
586 double const* x, int64_t incx,
587 double* A, int64_t lda );
588
589//------------------------------------------------------------------------------
590void syr2(
591 blas::Layout layout,
592 blas::Uplo uplo,
593 int64_t n,
594 float alpha,
595 float const* x, int64_t incx,
596 float const* y, int64_t incy,
597 float* A, int64_t lda );
598
599void syr2(
600 blas::Layout layout,
601 blas::Uplo uplo,
602 int64_t n,
603 double alpha,
604 double const* x, int64_t incx,
605 double const* y, int64_t incy,
606 double* A, int64_t lda );
607
608void syr2(
609 blas::Layout layout,
610 blas::Uplo uplo,
611 int64_t n,
612 std::complex<float> alpha,
613 std::complex<float> const* x, int64_t incx,
614 std::complex<float> const* y, int64_t incy,
615 std::complex<float>* A, int64_t lda );
616
617void syr2(
618 blas::Layout layout,
619 blas::Uplo uplo,
620 int64_t n,
621 std::complex<double> alpha,
622 std::complex<double> const* x, int64_t incx,
623 std::complex<double> const* y, int64_t incy,
624 std::complex<double>* A, int64_t lda );
625
626//------------------------------------------------------------------------------
627void trmv(
628 blas::Layout layout,
629 blas::Uplo uplo,
630 blas::Op trans,
631 blas::Diag diag,
632 int64_t n,
633 float const* A, int64_t lda,
634 float* x, int64_t incx );
635
636void trmv(
637 blas::Layout layout,
638 blas::Uplo uplo,
639 blas::Op trans,
640 blas::Diag diag,
641 int64_t n,
642 double const* A, int64_t lda,
643 double* x, int64_t incx );
644
645void trmv(
646 blas::Layout layout,
647 blas::Uplo uplo,
648 blas::Op trans,
649 blas::Diag diag,
650 int64_t n,
651 std::complex<float> const* A, int64_t lda,
652 std::complex<float>* x, int64_t incx );
653
654void trmv(
655 blas::Layout layout,
656 blas::Uplo uplo,
657 blas::Op trans,
658 blas::Diag diag,
659 int64_t n,
660 std::complex<double> const* A, int64_t lda,
661 std::complex<double>* x, int64_t incx );
662
663//------------------------------------------------------------------------------
664void trsv(
665 blas::Layout layout,
666 blas::Uplo uplo,
667 blas::Op trans,
668 blas::Diag diag,
669 int64_t n,
670 float const* A, int64_t lda,
671 float* x, int64_t incx );
672
673void trsv(
674 blas::Layout layout,
675 blas::Uplo uplo,
676 blas::Op trans,
677 blas::Diag diag,
678 int64_t n,
679 double const* A, int64_t lda,
680 double* x, int64_t incx );
681
682void trsv(
683 blas::Layout layout,
684 blas::Uplo uplo,
685 blas::Op trans,
686 blas::Diag diag,
687 int64_t n,
688 std::complex<float> const* A, int64_t lda,
689 std::complex<float>* x, int64_t incx );
690
691void trsv(
692 blas::Layout layout,
693 blas::Uplo uplo,
694 blas::Op trans,
695 blas::Diag diag,
696 int64_t n,
697 std::complex<double> const* A, int64_t lda,
698 std::complex<double>* x, int64_t incx );
699
700//==============================================================================
701// Level 3 BLAS
702
703//------------------------------------------------------------------------------
704void gemm(
705 blas::Layout layout,
706 blas::Op transA,
707 blas::Op transB,
708 int64_t m, int64_t n, int64_t k,
709 float alpha,
710 float const* A, int64_t lda,
711 float const* B, int64_t ldb,
712 float beta,
713 float* C, int64_t ldc );
714
715void gemm(
716 blas::Layout layout,
717 blas::Op transA,
718 blas::Op transB,
719 int64_t m, int64_t n, int64_t k,
720 double alpha,
721 double const* A, int64_t lda,
722 double const* B, int64_t ldb,
723 double beta,
724 double* C, int64_t ldc );
725
726void gemm(
727 blas::Layout layout,
728 blas::Op transA,
729 blas::Op transB,
730 int64_t m, int64_t n, int64_t k,
731 std::complex<float> alpha,
732 std::complex<float> const* A, int64_t lda,
733 std::complex<float> const* B, int64_t ldb,
734 std::complex<float> beta,
735 std::complex<float>* C, int64_t ldc );
736
737void gemm(
738 blas::Layout layout,
739 blas::Op transA,
740 blas::Op transB,
741 int64_t m, int64_t n, int64_t k,
742 std::complex<double> alpha,
743 std::complex<double> const* A, int64_t lda,
744 std::complex<double> const* B, int64_t ldb,
745 std::complex<double> beta,
746 std::complex<double>* C, int64_t ldc );
747
748//------------------------------------------------------------------------------
749void hemm(
750 blas::Layout layout,
751 blas::Side side,
752 blas::Uplo uplo,
753 int64_t m, int64_t n,
754 float alpha,
755 float const* A, int64_t lda,
756 float const* B, int64_t ldb,
757 float beta,
758 float* C, int64_t ldc );
759
760void hemm(
761 blas::Layout layout,
762 blas::Side side,
763 blas::Uplo uplo,
764 int64_t m, int64_t n,
765 double alpha,
766 double const* A, int64_t lda,
767 double const* B, int64_t ldb,
768 double beta,
769 double* C, int64_t ldc );
770
771void hemm(
772 blas::Layout layout,
773 blas::Side side,
774 blas::Uplo uplo,
775 int64_t m, int64_t n,
776 std::complex<float> alpha,
777 std::complex<float> const* A, int64_t lda,
778 std::complex<float> const* B, int64_t ldb,
779 std::complex<float> beta,
780 std::complex<float>* C, int64_t ldc );
781
782void hemm(
783 blas::Layout layout,
784 blas::Side side,
785 blas::Uplo uplo,
786 int64_t m, int64_t n,
787 std::complex<double> alpha,
788 std::complex<double> const* A, int64_t lda,
789 std::complex<double> const* B, int64_t ldb,
790 std::complex<double> beta,
791 std::complex<double>* C, int64_t ldc );
792
793//------------------------------------------------------------------------------
794void her2k(
795 blas::Layout layout,
796 blas::Uplo uplo,
797 blas::Op trans,
798 int64_t n, int64_t k,
799 float alpha,
800 float const* A, int64_t lda,
801 float const* B, int64_t ldb,
802 float beta,
803 float* C, int64_t ldc );
804
805void her2k(
806 blas::Layout layout,
807 blas::Uplo uplo,
808 blas::Op trans,
809 int64_t n, int64_t k,
810 double alpha,
811 double const* A, int64_t lda,
812 double const* B, int64_t ldb,
813 double beta,
814 double* C, int64_t ldc );
815
816void her2k(
817 blas::Layout layout,
818 blas::Uplo uplo,
819 blas::Op trans,
820 int64_t n, int64_t k,
821 std::complex<float> alpha, // note: complex
822 std::complex<float> const* A, int64_t lda,
823 std::complex<float> const* B, int64_t ldb,
824 float beta, // note: real
825 std::complex<float>* C, int64_t ldc );
826
827void her2k(
828 blas::Layout layout,
829 blas::Uplo uplo,
830 blas::Op trans,
831 int64_t n, int64_t k,
832 std::complex<double> alpha, // note: complex
833 std::complex<double> const* A, int64_t lda,
834 std::complex<double> const* B, int64_t ldb,
835 double beta, // note: real
836 std::complex<double>* C, int64_t ldc );
837
838//------------------------------------------------------------------------------
839void herk(
840 blas::Layout layout,
841 blas::Uplo uplo,
842 blas::Op trans,
843 int64_t n, int64_t k,
844 float alpha,
845 float const* A, int64_t lda,
846 float beta,
847 float* C, int64_t ldc );
848
849void herk(
850 blas::Layout layout,
851 blas::Uplo uplo,
852 blas::Op trans,
853 int64_t n, int64_t k,
854 double alpha,
855 double const* A, int64_t lda,
856 double beta,
857 double* C, int64_t ldc );
858
859void herk(
860 blas::Layout layout,
861 blas::Uplo uplo,
862 blas::Op trans,
863 int64_t n, int64_t k,
864 float alpha, // note: real
865 std::complex<float> const* A, int64_t lda,
866 float beta, // note: real
867 std::complex<float>* C, int64_t ldc );
868
869void herk(
870 blas::Layout layout,
871 blas::Uplo uplo,
872 blas::Op trans,
873 int64_t n, int64_t k,
874 double alpha,
875 std::complex<double> const* A, int64_t lda,
876 double beta,
877 std::complex<double>* C, int64_t ldc );
878
879//------------------------------------------------------------------------------
880void symm(
881 blas::Layout layout,
882 blas::Side side,
883 blas::Uplo uplo,
884 int64_t m, int64_t n,
885 float alpha,
886 float const* A, int64_t lda,
887 float const* B, int64_t ldb,
888 float beta,
889 float* C, int64_t ldc );
890
891void symm(
892 blas::Layout layout,
893 blas::Side side,
894 blas::Uplo uplo,
895 int64_t m, int64_t n,
896 double alpha,
897 double const* A, int64_t lda,
898 double const* B, int64_t ldb,
899 double beta,
900 double* C, int64_t ldc );
901
902void symm(
903 blas::Layout layout,
904 blas::Side side,
905 blas::Uplo uplo,
906 int64_t m, int64_t n,
907 std::complex<float> alpha,
908 std::complex<float> const* A, int64_t lda,
909 std::complex<float> const* B, int64_t ldb,
910 std::complex<float> beta,
911 std::complex<float>* C, int64_t ldc );
912
913void symm(
914 blas::Layout layout,
915 blas::Side side,
916 blas::Uplo uplo,
917 int64_t m, int64_t n,
918 std::complex<double> alpha,
919 std::complex<double> const* A, int64_t lda,
920 std::complex<double> const* B, int64_t ldb,
921 std::complex<double> beta,
922 std::complex<double>* C, int64_t ldc );
923
924//------------------------------------------------------------------------------
925void syr2k(
926 blas::Layout layout,
927 blas::Uplo uplo,
928 blas::Op trans,
929 int64_t n, int64_t k,
930 float alpha,
931 float const* A, int64_t lda,
932 float const* B, int64_t ldb,
933 float beta,
934 float* C, int64_t ldc );
935
936void syr2k(
937 blas::Layout layout,
938 blas::Uplo uplo,
939 blas::Op trans,
940 int64_t n, int64_t k,
941 double alpha,
942 double const* A, int64_t lda,
943 double const* B, int64_t ldb,
944 double beta,
945 double* C, int64_t ldc );
946
947void syr2k(
948 blas::Layout layout,
949 blas::Uplo uplo,
950 blas::Op trans,
951 int64_t n, int64_t k,
952 std::complex<float> alpha,
953 std::complex<float> const* A, int64_t lda,
954 std::complex<float> const* B, int64_t ldb,
955 std::complex<float> beta,
956 std::complex<float>* C, int64_t ldc );
957
958void syr2k(
959 blas::Layout layout,
960 blas::Uplo uplo,
961 blas::Op trans,
962 int64_t n, int64_t k,
963 std::complex<double> alpha,
964 std::complex<double> const* A, int64_t lda,
965 std::complex<double> const* B, int64_t ldb,
966 std::complex<double> beta,
967 std::complex<double>* C, int64_t ldc );
968
969//------------------------------------------------------------------------------
970void syrk(
971 blas::Layout layout,
972 blas::Uplo uplo,
973 blas::Op trans,
974 int64_t n, int64_t k,
975 float alpha,
976 float const* A, int64_t lda,
977 float beta,
978 float* C, int64_t ldc );
979
980void syrk(
981 blas::Layout layout,
982 blas::Uplo uplo,
983 blas::Op trans,
984 int64_t n, int64_t k,
985 double alpha,
986 double const* A, int64_t lda,
987 double beta,
988 double* C, int64_t ldc );
989
990void syrk(
991 blas::Layout layout,
992 blas::Uplo uplo,
993 blas::Op trans,
994 int64_t n, int64_t k,
995 std::complex<float> alpha,
996 std::complex<float> const* A, int64_t lda,
997 std::complex<float> beta,
998 std::complex<float>* C, int64_t ldc );
999
1000void syrk(
1001 blas::Layout layout,
1002 blas::Uplo uplo,
1003 blas::Op trans,
1004 int64_t n, int64_t k,
1005 std::complex<double> alpha,
1006 std::complex<double> const* A, int64_t lda,
1007 std::complex<double> beta,
1008 std::complex<double>* C, int64_t ldc );
1009
1010//------------------------------------------------------------------------------
1011void trmm(
1012 blas::Layout layout,
1013 blas::Side side,
1014 blas::Uplo uplo,
1015 blas::Op trans,
1016 blas::Diag diag,
1017 int64_t m,
1018 int64_t n,
1019 float alpha,
1020 float const* A, int64_t lda,
1021 float* B, int64_t ldb );
1022
1023void trmm(
1024 blas::Layout layout,
1025 blas::Side side,
1026 blas::Uplo uplo,
1027 blas::Op trans,
1028 blas::Diag diag,
1029 int64_t m,
1030 int64_t n,
1031 double alpha,
1032 double const* A, int64_t lda,
1033 double* B, int64_t ldb );
1034
1035void trmm(
1036 blas::Layout layout,
1037 blas::Side side,
1038 blas::Uplo uplo,
1039 blas::Op trans,
1040 blas::Diag diag,
1041 int64_t m,
1042 int64_t n,
1043 std::complex<float> alpha,
1044 std::complex<float> const* A, int64_t lda,
1045 std::complex<float>* B, int64_t ldb );
1046
1047void trmm(
1048 blas::Layout layout,
1049 blas::Side side,
1050 blas::Uplo uplo,
1051 blas::Op trans,
1052 blas::Diag diag,
1053 int64_t m,
1054 int64_t n,
1055 std::complex<double> alpha,
1056 std::complex<double> const* A, int64_t lda,
1057 std::complex<double>* B, int64_t ldb );
1058
1059//------------------------------------------------------------------------------
1060void trsm(
1061 blas::Layout layout,
1062 blas::Side side,
1063 blas::Uplo uplo,
1064 blas::Op trans,
1065 blas::Diag diag,
1066 int64_t m,
1067 int64_t n,
1068 float alpha,
1069 float const* A, int64_t lda,
1070 float* B, int64_t ldb );
1071
1072void trsm(
1073 blas::Layout layout,
1074 blas::Side side,
1075 blas::Uplo uplo,
1076 blas::Op trans,
1077 blas::Diag diag,
1078 int64_t m,
1079 int64_t n,
1080 double alpha,
1081 double const* A, int64_t lda,
1082 double* B, int64_t ldb );
1083
1084void trsm(
1085 blas::Layout layout,
1086 blas::Side side,
1087 blas::Uplo uplo,
1088 blas::Op trans,
1089 blas::Diag diag,
1090 int64_t m,
1091 int64_t n,
1092 std::complex<float> alpha,
1093 std::complex<float> const* A, int64_t lda,
1094 std::complex<float>* B, int64_t ldb );
1095
1096void trsm(
1097 blas::Layout layout,
1098 blas::Side side,
1099 blas::Uplo uplo,
1100 blas::Op trans,
1101 blas::Diag diag,
1102 int64_t m,
1103 int64_t n,
1104 std::complex<double> alpha,
1105 std::complex<double> const* A, int64_t lda,
1106 std::complex<double>* B, int64_t ldb );
1107
1108#endif // BLAS_USE_TEMPLATE
1109
1110//==============================================================================
1111// Batch BLAS APIs (host)
1112//==============================================================================
1113namespace batch {
1114
1115//==============================================================================
1116// Level 1 Batch BLAS
1117
1118//==============================================================================
1119// Level 2 Batch BLAS
1120
1121//==============================================================================
1122// Level 3 Batch BLAS
1123
1124//------------------------------------------------------------------------------
1125// batch gemm
1126void gemm(
1127 blas::Layout layout,
1128 std::vector<blas::Op> const& transA,
1129 std::vector<blas::Op> const& transB,
1130 std::vector<int64_t> const& m,
1131 std::vector<int64_t> const& n,
1132 std::vector<int64_t> const& k,
1133 std::vector<float > const& alpha,
1134 std::vector<float*> const& Aarray, std::vector<int64_t> const& lda,
1135 std::vector<float*> const& Barray, std::vector<int64_t> const& ldb,
1136 std::vector<float > const& beta,
1137 std::vector<float*> const& Carray, std::vector<int64_t> const& ldc,
1138 size_t batch_size,
1139 std::vector<int64_t>& info );
1140
1141void gemm(
1142 blas::Layout layout,
1143 std::vector<blas::Op> const& transA,
1144 std::vector<blas::Op> const& transB,
1145 std::vector<int64_t> const& m,
1146 std::vector<int64_t> const& n,
1147 std::vector<int64_t> const& k,
1148 std::vector<double > const& alpha,
1149 std::vector<double*> const& Aarray, std::vector<int64_t> const& lda,
1150 std::vector<double*> const& Barray, std::vector<int64_t> const& ldb,
1151 std::vector<double > const& beta,
1152 std::vector<double*> const& Carray, std::vector<int64_t> const& ldc,
1153 size_t batch_size,
1154 std::vector<int64_t>& info );
1155
1156void gemm(
1157 blas::Layout layout,
1158 std::vector<blas::Op> const& transA,
1159 std::vector<blas::Op> const& transB,
1160 std::vector<int64_t> const& m,
1161 std::vector<int64_t> const& n,
1162 std::vector<int64_t> const& k,
1163 std::vector< std::complex<float> > const& alpha,
1164 std::vector< std::complex<float>* > const& Aarray, std::vector<int64_t> const& lda,
1165 std::vector< std::complex<float>* > const& Barray, std::vector<int64_t> const& ldb,
1166 std::vector< std::complex<float> > const& beta,
1167 std::vector< std::complex<float>* > const& Carray, std::vector<int64_t> const& ldc,
1168 size_t batch_size,
1169 std::vector<int64_t>& info );
1170
1171void gemm(
1172 blas::Layout layout,
1173 std::vector<blas::Op> const& transA,
1174 std::vector<blas::Op> const& transB,
1175 std::vector<int64_t> const& m,
1176 std::vector<int64_t> const& n,
1177 std::vector<int64_t> const& k,
1178 std::vector< std::complex<double> > const& alpha,
1179 std::vector< std::complex<double>* > const& Aarray, std::vector<int64_t> const& lda,
1180 std::vector< std::complex<double>* > const& Barray, std::vector<int64_t> const& ldb,
1181 std::vector< std::complex<double> > const& beta,
1182 std::vector< std::complex<double>* > const& Carray, std::vector<int64_t> const& ldc,
1183 size_t batch_size,
1184 std::vector<int64_t>& info );
1185
1186//------------------------------------------------------------------------------
1187// batch hemm
1188void hemm(
1189 blas::Layout layout,
1190 std::vector<blas::Side> const& side,
1191 std::vector<blas::Uplo> const& uplo,
1192 std::vector<int64_t> const& m,
1193 std::vector<int64_t> const& n,
1194 std::vector<float > const& alpha,
1195 std::vector<float*> const& Aarray, std::vector<int64_t> const& lda,
1196 std::vector<float*> const& Barray, std::vector<int64_t> const& ldb,
1197 std::vector<float > const& beta,
1198 std::vector<float*> const& Carray, std::vector<int64_t> const& ldc,
1199 size_t batch_size,
1200 std::vector<int64_t>& info );
1201
1202void hemm(
1203 blas::Layout layout,
1204 std::vector<blas::Side> const& side,
1205 std::vector<blas::Uplo> const& uplo,
1206 std::vector<int64_t> const& m,
1207 std::vector<int64_t> const& n,
1208 std::vector<double > const& alpha,
1209 std::vector<double*> const& Aarray, std::vector<int64_t> const& lda,
1210 std::vector<double*> const& Barray, std::vector<int64_t> const& ldb,
1211 std::vector<double > const& beta,
1212 std::vector<double*> const& Carray, std::vector<int64_t> const& ldc,
1213 size_t batch_size,
1214 std::vector<int64_t>& info );
1215
1216void hemm(
1217 blas::Layout layout,
1218 std::vector<blas::Side> const& side,
1219 std::vector<blas::Uplo> const& uplo,
1220 std::vector<int64_t> const& m,
1221 std::vector<int64_t> const& n,
1222 std::vector< std::complex<float> > const& alpha,
1223 std::vector< std::complex<float>* > const& Aarray, std::vector<int64_t> const& lda,
1224 std::vector< std::complex<float>* > const& Barray, std::vector<int64_t> const& ldb,
1225 std::vector< std::complex<float> > const& beta,
1226 std::vector< std::complex<float>* > const& Carray, std::vector<int64_t> const& ldc,
1227 size_t batch_size,
1228 std::vector<int64_t>& info );
1229
1230void hemm(
1231 blas::Layout layout,
1232 std::vector<blas::Side> const& side,
1233 std::vector<blas::Uplo> const& uplo,
1234 std::vector<int64_t> const& m,
1235 std::vector<int64_t> const& n,
1236 std::vector< std::complex<double> > const& alpha,
1237 std::vector< std::complex<double>* > const& Aarray, std::vector<int64_t> const& lda,
1238 std::vector< std::complex<double>* > const& Barray, std::vector<int64_t> const& ldb,
1239 std::vector< std::complex<double> > const& beta,
1240 std::vector< std::complex<double>* > const& Carray, std::vector<int64_t> const& ldc,
1241 size_t batch_size,
1242 std::vector<int64_t>& info );
1243
1244//------------------------------------------------------------------------------
1245// batch her2k
1246void her2k(
1247 blas::Layout layout,
1248 std::vector<blas::Uplo> const& uplo,
1249 std::vector<blas::Op> const& trans,
1250 std::vector<int64_t> const& n,
1251 std::vector<int64_t> const& k,
1252 std::vector<float > const& alpha,
1253 std::vector<float*> const& Aarray, std::vector<int64_t> const& lda,
1254 std::vector<float*> const& Barray, std::vector<int64_t> const& ldb,
1255 std::vector<float > const& beta,
1256 std::vector<float*> const& Carray, std::vector<int64_t> const& ldc,
1257 size_t batch_size,
1258 std::vector<int64_t>& info );
1259
1260void her2k(
1261 blas::Layout layout,
1262 std::vector<blas::Uplo> const& uplo,
1263 std::vector<blas::Op> const& trans,
1264 std::vector<int64_t> const& n,
1265 std::vector<int64_t> const& k,
1266 std::vector<double > const& alpha,
1267 std::vector<double*> const& Aarray, std::vector<int64_t> const& lda,
1268 std::vector<double*> const& Barray, std::vector<int64_t> const& ldb,
1269 std::vector<double > const& beta,
1270 std::vector<double*> const& Carray, std::vector<int64_t> const& ldc,
1271 size_t batch_size,
1272 std::vector<int64_t>& info );
1273
1274void her2k(
1275 blas::Layout layout,
1276 std::vector<blas::Uplo> const& uplo,
1277 std::vector<blas::Op> const& trans,
1278 std::vector<int64_t> const& n,
1279 std::vector<int64_t> const& k,
1280 std::vector< std::complex<float> > const& alpha,
1281 std::vector< std::complex<float>* > const& Aarray, std::vector<int64_t> const& lda,
1282 std::vector< std::complex<float>* > const& Barray, std::vector<int64_t> const& ldb,
1283 std::vector< float > const& beta,
1284 std::vector< std::complex<float>* > const& Carray, std::vector<int64_t> const& ldc,
1285 size_t batch_size,
1286 std::vector<int64_t>& info );
1287
1288void her2k(
1289 blas::Layout layout,
1290 std::vector<blas::Uplo> const& uplo,
1291 std::vector<blas::Op> const& trans,
1292 std::vector<int64_t> const& n,
1293 std::vector<int64_t> const& k,
1294 std::vector< std::complex<double> > const& alpha,
1295 std::vector< std::complex<double>* > const& Aarray, std::vector<int64_t> const& lda,
1296 std::vector< std::complex<double>* > const& Barray, std::vector<int64_t> const& ldb,
1297 std::vector< double > const& beta,
1298 std::vector< std::complex<double>* > const& Carray, std::vector<int64_t> const& ldc,
1299 size_t batch_size,
1300 std::vector<int64_t>& info );
1301
1302//------------------------------------------------------------------------------
1303// batch herk
1304void herk(
1305 blas::Layout layout,
1306 std::vector<blas::Uplo> const& uplo,
1307 std::vector<blas::Op> const& trans,
1308 std::vector<int64_t> const& n,
1309 std::vector<int64_t> const& k,
1310 std::vector<float > const& alpha,
1311 std::vector<float*> const& Aarray, std::vector<int64_t> const& lda,
1312 std::vector<float > const& beta,
1313 std::vector<float*> const& Carray, std::vector<int64_t> const& ldc,
1314 size_t batch_size,
1315 std::vector<int64_t>& info );
1316
1317void herk(
1318 blas::Layout layout,
1319 std::vector<blas::Uplo> const& uplo,
1320 std::vector<blas::Op> const& trans,
1321 std::vector<int64_t> const& n,
1322 std::vector<int64_t> const& k,
1323 std::vector<double > const& alpha,
1324 std::vector<double*> const& Aarray, std::vector<int64_t> const& lda,
1325 std::vector<double > const& beta,
1326 std::vector<double*> const& Carray, std::vector<int64_t> const& ldc,
1327 size_t batch_size,
1328 std::vector<int64_t>& info );
1329
1330void herk(
1331 blas::Layout layout,
1332 std::vector<blas::Uplo> const& uplo,
1333 std::vector<blas::Op> const& trans,
1334 std::vector<int64_t> const& n,
1335 std::vector<int64_t> const& k,
1336 std::vector< float > const& alpha,
1337 std::vector< std::complex<float>* > const& Aarray, std::vector<int64_t> const& lda,
1338 std::vector< float > const& beta,
1339 std::vector< std::complex<float>* > const& Carray, std::vector<int64_t> const& ldc,
1340 size_t batch_size,
1341 std::vector<int64_t>& info );
1342
1343void herk(
1344 blas::Layout layout,
1345 std::vector<blas::Uplo> const& uplo,
1346 std::vector<blas::Op> const& trans,
1347 std::vector<int64_t> const& n,
1348 std::vector<int64_t> const& k,
1349 std::vector< double > const& alpha,
1350 std::vector< std::complex<double>* > const& Aarray, std::vector<int64_t> const& lda,
1351 std::vector< double > const& beta,
1352 std::vector< std::complex<double>* > const& Carray, std::vector<int64_t> const& ldc,
1353 size_t batch_size,
1354 std::vector<int64_t>& info );
1355
1356//------------------------------------------------------------------------------
1357// batch symm
1358void symm(
1359 blas::Layout layout,
1360 std::vector<blas::Side> const& side,
1361 std::vector<blas::Uplo> const& uplo,
1362 std::vector<int64_t> const& m,
1363 std::vector<int64_t> const& n,
1364 std::vector<float > const& alpha,
1365 std::vector<float*> const& Aarray, std::vector<int64_t> const& lda,
1366 std::vector<float*> const& Barray, std::vector<int64_t> const& ldb,
1367 std::vector<float > const& beta,
1368 std::vector<float*> const& Carray, std::vector<int64_t> const& ldc,
1369 size_t batch_size,
1370 std::vector<int64_t>& info );
1371
1372void symm(
1373 blas::Layout layout,
1374 std::vector<blas::Side> const& side,
1375 std::vector<blas::Uplo> const& uplo,
1376 std::vector<int64_t> const& m,
1377 std::vector<int64_t> const& n,
1378 std::vector<double > const& alpha,
1379 std::vector<double*> const& Aarray, std::vector<int64_t> const& lda,
1380 std::vector<double*> const& Barray, std::vector<int64_t> const& ldb,
1381 std::vector<double > const& beta,
1382 std::vector<double*> const& Carray, std::vector<int64_t> const& ldc,
1383 size_t batch_size,
1384 std::vector<int64_t>& info );
1385
1386void symm(
1387 blas::Layout layout,
1388 std::vector<blas::Side> const& side,
1389 std::vector<blas::Uplo> const& uplo,
1390 std::vector<int64_t> const& m,
1391 std::vector<int64_t> const& n,
1392 std::vector< std::complex<float> > const& alpha,
1393 std::vector< std::complex<float>* > const& Aarray, std::vector<int64_t> const& lda,
1394 std::vector< std::complex<float>* > const& Barray, std::vector<int64_t> const& ldb,
1395 std::vector< std::complex<float> > const& beta,
1396 std::vector< std::complex<float>* > const& Carray, std::vector<int64_t> const& ldc,
1397 size_t batch_size,
1398 std::vector<int64_t>& info );
1399
1400void symm(
1401 blas::Layout layout,
1402 std::vector<blas::Side> const& side,
1403 std::vector<blas::Uplo> const& uplo,
1404 std::vector<int64_t> const& m,
1405 std::vector<int64_t> const& n,
1406 std::vector< std::complex<double> > const& alpha,
1407 std::vector< std::complex<double>* > const& Aarray, std::vector<int64_t> const& lda,
1408 std::vector< std::complex<double>* > const& Barray, std::vector<int64_t> const& ldb,
1409 std::vector< std::complex<double> > const& beta,
1410 std::vector< std::complex<double>* > const& Carray, std::vector<int64_t> const& ldc,
1411 size_t batch_size,
1412 std::vector<int64_t>& info );
1413
1414//------------------------------------------------------------------------------
1415// batch syr2k
1416void syr2k(
1417 blas::Layout layout,
1418 std::vector<blas::Uplo> const& uplo,
1419 std::vector<blas::Op> const& trans,
1420 std::vector<int64_t> const& n,
1421 std::vector<int64_t> const& k,
1422 std::vector<float > const& alpha,
1423 std::vector<float*> const& Aarray, std::vector<int64_t> const& lda,
1424 std::vector<float*> const& Barray, std::vector<int64_t> const& ldb,
1425 std::vector<float > const& beta,
1426 std::vector<float*> const& Carray, std::vector<int64_t> const& ldc,
1427 size_t batch_size,
1428 std::vector<int64_t>& info );
1429
1430void syr2k(
1431 blas::Layout layout,
1432 std::vector<blas::Uplo> const& uplo,
1433 std::vector<blas::Op> const& trans,
1434 std::vector<int64_t> const& n,
1435 std::vector<int64_t> const& k,
1436 std::vector<double > const& alpha,
1437 std::vector<double*> const& Aarray, std::vector<int64_t> const& lda,
1438 std::vector<double*> const& Barray, std::vector<int64_t> const& ldb,
1439 std::vector<double > const& beta,
1440 std::vector<double*> const& Carray, std::vector<int64_t> const& ldc,
1441 size_t batch_size,
1442 std::vector<int64_t>& info );
1443
1444void syr2k(
1445 blas::Layout layout,
1446 std::vector<blas::Uplo> const& uplo,
1447 std::vector<blas::Op> const& trans,
1448 std::vector<int64_t> const& n,
1449 std::vector<int64_t> const& k,
1450 std::vector< std::complex<float> > const& alpha,
1451 std::vector< std::complex<float>* > const& Aarray, std::vector<int64_t> const& lda,
1452 std::vector< std::complex<float>* > const& Barray, std::vector<int64_t> const& ldb,
1453 std::vector< std::complex<float> > const& beta,
1454 std::vector< std::complex<float>* > const& Carray, std::vector<int64_t> const& ldc,
1455 size_t batch_size,
1456 std::vector<int64_t>& info );
1457
1458void syr2k(
1459 blas::Layout layout,
1460 std::vector<blas::Uplo> const& uplo,
1461 std::vector<blas::Op> const& trans,
1462 std::vector<int64_t> const& n,
1463 std::vector<int64_t> const& k,
1464 std::vector< std::complex<double> > const& alpha,
1465 std::vector< std::complex<double>* > const& Aarray, std::vector<int64_t> const& lda,
1466 std::vector< std::complex<double>* > const& Barray, std::vector<int64_t> const& ldb,
1467 std::vector< std::complex<double> > const& beta,
1468 std::vector< std::complex<double>* > const& Carray, std::vector<int64_t> const& ldc,
1469 size_t batch_size,
1470 std::vector<int64_t>& info );
1471
1472//------------------------------------------------------------------------------
1473// batch syrk
1474void syrk(
1475 blas::Layout layout,
1476 std::vector<blas::Uplo> const& uplo,
1477 std::vector<blas::Op> const& trans,
1478 std::vector<int64_t> const& n,
1479 std::vector<int64_t> const& k,
1480 std::vector<float > const& alpha,
1481 std::vector<float*> const& Aarray, std::vector<int64_t> const& lda,
1482 std::vector<float > const& beta,
1483 std::vector<float*> const& Carray, std::vector<int64_t> const& ldc,
1484 size_t batch_size,
1485 std::vector<int64_t>& info );
1486
1487void syrk(
1488 blas::Layout layout,
1489 std::vector<blas::Uplo> const& uplo,
1490 std::vector<blas::Op> const& trans,
1491 std::vector<int64_t> const& n,
1492 std::vector<int64_t> const& k,
1493 std::vector<double > const& alpha,
1494 std::vector<double*> const& Aarray, std::vector<int64_t> const& lda,
1495 std::vector<double > const& beta,
1496 std::vector<double*> const& Carray, std::vector<int64_t> const& ldc,
1497 size_t batch_size,
1498 std::vector<int64_t>& info );
1499
1500void syrk(
1501 blas::Layout layout,
1502 std::vector<blas::Uplo> const& uplo,
1503 std::vector<blas::Op> const& trans,
1504 std::vector<int64_t> const& n,
1505 std::vector<int64_t> const& k,
1506 std::vector< std::complex<float> > const& alpha,
1507 std::vector< std::complex<float>* > const& Aarray, std::vector<int64_t> const& lda,
1508 std::vector< std::complex<float> > const& beta,
1509 std::vector< std::complex<float>* > const& Carray, std::vector<int64_t> const& ldc,
1510 size_t batch_size,
1511 std::vector<int64_t>& info );
1512
1513void syrk(
1514 blas::Layout layout,
1515 std::vector<blas::Uplo> const& uplo,
1516 std::vector<blas::Op> const& trans,
1517 std::vector<int64_t> const& n,
1518 std::vector<int64_t> const& k,
1519 std::vector< std::complex<double> > const& alpha,
1520 std::vector< std::complex<double>* > const& Aarray, std::vector<int64_t> const& lda,
1521 std::vector< std::complex<double> > const& beta,
1522 std::vector< std::complex<double>* > const& Carray, std::vector<int64_t> const& ldc,
1523 size_t batch_size,
1524 std::vector<int64_t>& info );
1525
1526//------------------------------------------------------------------------------
1527// batch trmm
1528void trmm(
1529 blas::Layout layout,
1530 std::vector<blas::Side> const& side,
1531 std::vector<blas::Uplo> const& uplo,
1532 std::vector<blas::Op> const& trans,
1533 std::vector<blas::Diag> const& diag,
1534 std::vector<int64_t> const& m,
1535 std::vector<int64_t> const& n,
1536 std::vector<float > const& alpha,
1537 std::vector<float*> const& Aarray, std::vector<int64_t> const& lda,
1538 std::vector<float*> const& Barray, std::vector<int64_t> const& ldb,
1539 size_t batch_size,
1540 std::vector<int64_t>& info );
1541
1542void trmm(
1543 blas::Layout layout,
1544 std::vector<blas::Side> const& side,
1545 std::vector<blas::Uplo> const& uplo,
1546 std::vector<blas::Op> const& trans,
1547 std::vector<blas::Diag> const& diag,
1548 std::vector<int64_t> const& m,
1549 std::vector<int64_t> const& n,
1550 std::vector<double > const& alpha,
1551 std::vector<double*> const& Aarray, std::vector<int64_t> const& lda,
1552 std::vector<double*> const& Barray, std::vector<int64_t> const& ldb,
1553 size_t batch_size,
1554 std::vector<int64_t>& info );
1555
1556void trmm(
1557 blas::Layout layout,
1558 std::vector<blas::Side> const& side,
1559 std::vector<blas::Uplo> const& uplo,
1560 std::vector<blas::Op> const& trans,
1561 std::vector<blas::Diag> const& diag,
1562 std::vector<int64_t> const& m,
1563 std::vector<int64_t> const& n,
1564 std::vector< std::complex<float> > const& alpha,
1565 std::vector< std::complex<float>* > const& Aarray, std::vector<int64_t> const& lda,
1566 std::vector< std::complex<float>* > const& Barray, std::vector<int64_t> const& ldb,
1567 size_t batch_size,
1568 std::vector<int64_t>& info );
1569
1570void trmm(
1571 blas::Layout layout,
1572 std::vector<blas::Side> const& side,
1573 std::vector<blas::Uplo> const& uplo,
1574 std::vector<blas::Op> const& trans,
1575 std::vector<blas::Diag> const& diag,
1576 std::vector<int64_t> const& m,
1577 std::vector<int64_t> const& n,
1578 std::vector< std::complex<double> > const& alpha,
1579 std::vector< std::complex<double>* > const& Aarray, std::vector<int64_t> const& lda,
1580 std::vector< std::complex<double>* > const& Barray, std::vector<int64_t> const& ldb,
1581 size_t batch_size,
1582 std::vector<int64_t>& info );
1583
1584//------------------------------------------------------------------------------
1585// batch trsm
1586void trsm(
1587 blas::Layout layout,
1588 std::vector<blas::Side> const& side,
1589 std::vector<blas::Uplo> const& uplo,
1590 std::vector<blas::Op> const& trans,
1591 std::vector<blas::Diag> const& diag,
1592 std::vector<int64_t> const& m,
1593 std::vector<int64_t> const& n,
1594 std::vector<float > const& alpha,
1595 std::vector<float*> const& Aarray, std::vector<int64_t> const& lda,
1596 std::vector<float*> const& Barray, std::vector<int64_t> const& ldb,
1597 size_t batch_size,
1598 std::vector<int64_t>& info );
1599
1600void trsm(
1601 blas::Layout layout,
1602 std::vector<blas::Side> const& side,
1603 std::vector<blas::Uplo> const& uplo,
1604 std::vector<blas::Op> const& trans,
1605 std::vector<blas::Diag> const& diag,
1606 std::vector<int64_t> const& m,
1607 std::vector<int64_t> const& n,
1608 std::vector<double > const& alpha,
1609 std::vector<double*> const& Aarray, std::vector<int64_t> const& lda,
1610 std::vector<double*> const& Barray, std::vector<int64_t> const& ldb,
1611 size_t batch_size,
1612 std::vector<int64_t>& info );
1613
1614void trsm(
1615 blas::Layout layout,
1616 std::vector<blas::Side> const& side,
1617 std::vector<blas::Uplo> const& uplo,
1618 std::vector<blas::Op> const& trans,
1619 std::vector<blas::Diag> const& diag,
1620 std::vector<int64_t> const& m,
1621 std::vector<int64_t> const& n,
1622 std::vector< std::complex<float> > const& alpha,
1623 std::vector< std::complex<float>* > const& Aarray, std::vector<int64_t> const& lda,
1624 std::vector< std::complex<float>* > const& Barray, std::vector<int64_t> const& ldb,
1625 size_t batch_size,
1626 std::vector<int64_t>& info );
1627
1628void trsm(
1629 blas::Layout layout,
1630 std::vector<blas::Side> const& side,
1631 std::vector<blas::Uplo> const& uplo,
1632 std::vector<blas::Op> const& trans,
1633 std::vector<blas::Diag> const& diag,
1634 std::vector<int64_t> const& m,
1635 std::vector<int64_t> const& n,
1636 std::vector< std::complex<double> > const& alpha,
1637 std::vector< std::complex<double>* > const& Aarray, std::vector<int64_t> const& lda,
1638 std::vector< std::complex<double>* > const& Barray, std::vector<int64_t> const& ldb,
1639 size_t batch_size,
1640 std::vector<int64_t>& info );
1641
1642} // namespace batch
1643} // namespace blas
real_type< T > asum(int64_t n, T const *x, int64_t incx)
Definition asum.hh:35
void axpy(int64_t n, blas::scalar_type< TX, TY > alpha, TX const *x, int64_t incx, TY *y, int64_t incy)
Add scaled vector, .
Definition axpy.hh:43
void copy(int64_t n, TX const *x, int64_t incx, TY *y, int64_t incy)
Copy vector, .
Definition copy.hh:40
void dot(int64_t n, float const *x, int64_t incx, float const *y, int64_t incy, float *result, blas::Queue &queue)
GPU device, float version.
Definition device_dot.cc:139
void dotu(int64_t n, float const *x, int64_t incx, float const *y, int64_t incy, float *result, blas::Queue &queue)
GPU device, float, unconjugated x^T y version.
Definition device_dot.cc:194
void gemm(blas::Layout layout, std::vector< blas::Op > const &transA, std::vector< blas::Op > const &transB, std::vector< int64_t > const &m, std::vector< int64_t > const &n, std::vector< int64_t > const &k, std::vector< float > const &alpha, std::vector< float * > const &Aarray, std::vector< int64_t > const &lda, std::vector< float * > const &Barray, std::vector< int64_t > const &ldb, std::vector< float > const &beta, std::vector< float * > const &Carray, std::vector< int64_t > const &ldc, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
GPU device, variable-size batched, float version.
Definition device_batch_gemm.cc:163
void gemm(blas::Layout layout, blas::Op transA, blas::Op transB, int64_t m, int64_t n, int64_t k, float alpha, float const *A, int64_t lda, float const *B, int64_t ldb, float beta, float *C, int64_t ldc, blas::Queue &queue)
GPU device, float version.
Definition device_gemm.cc:119
void gemv(blas::Layout layout, blas::Op trans, int64_t m, int64_t n, blas::scalar_type< TA, TX, TY > alpha, TA const *A, int64_t lda, TX const *x, int64_t incx, blas::scalar_type< TA, TX, TY > beta, TY *y, int64_t incy)
General matrix-vector multiply:
Definition gemv.hh:79
void ger(blas::Layout layout, int64_t m, int64_t n, blas::scalar_type< TA, TX, TY > alpha, TX const *x, int64_t incx, TY const *y, int64_t incy, TA *A, int64_t lda)
General matrix rank-1 update:
Definition ger.hh:60
void geru(blas::Layout layout, int64_t m, int64_t n, blas::scalar_type< TA, TX, TY > alpha, TX const *x, int64_t incx, TY const *y, int64_t incy, TA *A, int64_t lda)
General matrix rank-1 update:
Definition geru.hh:61
void hemm(blas::Layout layout, blas::Side side, blas::Uplo uplo, int64_t m, int64_t n, float alpha, float const *A, int64_t lda, float const *B, int64_t ldb, float beta, float *C, int64_t ldc, blas::Queue &queue)
GPU device, float version.
Definition device_hemm.cc:107
void hemm(blas::Layout layout, std::vector< blas::Side > const &side, std::vector< blas::Uplo > const &uplo, std::vector< int64_t > const &m, std::vector< int64_t > const &n, std::vector< float > const &alpha, std::vector< float * > const &Aarray, std::vector< int64_t > const &lda, std::vector< float * > const &Barray, std::vector< int64_t > const &ldb, std::vector< float > const &beta, std::vector< float * > const &Carray, std::vector< int64_t > const &ldc, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
GPU device, variable-size batched, float version.
Definition device_batch_hemm.cc:102
void hemv(blas::Layout layout, blas::Uplo uplo, int64_t n, blas::scalar_type< TA, TX, TY > alpha, TA const *A, int64_t lda, TX const *x, int64_t incx, blas::scalar_type< TA, TX, TY > beta, TY *y, int64_t incy)
Hermitian matrix-vector multiply:
Definition hemv.hh:69
void her2(blas::Layout layout, blas::Uplo uplo, int64_t n, blas::scalar_type< TA, TX, TY > alpha, TX const *x, int64_t incx, TY const *y, int64_t incy, TA *A, int64_t lda)
Hermitian matrix rank-2 update:
Definition her2.hh:66
void her2k(blas::Layout layout, std::vector< blas::Uplo > const &uplo, std::vector< blas::Op > const &trans, std::vector< int64_t > const &n, std::vector< int64_t > const &k, std::vector< float > const &alpha, std::vector< float * > const &Aarray, std::vector< int64_t > const &lda, std::vector< float * > const &Barray, std::vector< int64_t > const &ldb, std::vector< float > const &beta, std::vector< float * > const &Carray, std::vector< int64_t > const &ldc, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
GPU device, variable-size batched, float version.
Definition device_batch_her2k.cc:89
void her2k(blas::Layout layout, blas::Uplo uplo, blas::Op trans, int64_t n, int64_t k, float alpha, float const *A, int64_t lda, float const *B, int64_t ldb, float beta, float *C, int64_t ldc, blas::Queue &queue)
GPU device, float version.
Definition device_her2k.cc:100
void her(blas::Layout layout, blas::Uplo uplo, int64_t n, blas::real_type< TA, TX > alpha, TX const *x, int64_t incx, TA *A, int64_t lda)
Hermitian matrix rank-1 update:
Definition her.hh:59
void herk(blas::Layout layout, std::vector< blas::Uplo > const &uplo, std::vector< blas::Op > const &trans, std::vector< int64_t > const &n, std::vector< int64_t > const &k, std::vector< float > const &alpha, std::vector< float * > const &Aarray, std::vector< int64_t > const &lda, std::vector< float > const &beta, std::vector< float * > const &Carray, std::vector< int64_t > const &ldc, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
GPU device, variable-size batched, float version.
Definition device_batch_herk.cc:87
void herk(blas::Layout layout, blas::Uplo uplo, blas::Op trans, int64_t n, int64_t k, float alpha, float const *A, int64_t lda, float beta, float *C, int64_t ldc, blas::Queue &queue)
GPU device, float version.
Definition device_herk.cc:92
int64_t iamax(int64_t n, T const *x, int64_t incx)
Definition iamax.hh:34
void nrm2(int64_t n, float const *x, int64_t incx, float *result, blas::Queue &queue)
GPU device, float version.
Definition device_nrm2.cc:84
void rot(int64_t n, TX *x, int64_t incx, TY *y, int64_t incy, blas::real_type< TX, TY > c, blas::scalar_type< TX, TY > s)
Apply plane rotation:
Definition rot.hh:53
void rotg(TA *a, TB *b, blas::real_type< TA, TB > *c, blas::real_type< TA, TB > *s)
Construct plane rotation that eliminates b, such that:
Definition rotg.hh:47
void rotm(int64_t n, TX *x, int64_t incx, TY *y, int64_t incy, blas::scalar_type< TX, TY > const param[5])
Apply modified (fast) plane rotation, H:
Definition rotm.hh:50
void rotmg(T *d1, T *d2, T *a, T b, T param[5])
Construct modified (fast) plane rotation, H, that eliminates b, such that.
Definition rotmg.hh:99
void scal(int64_t n, float alpha, float *x, int64_t incx, blas::Queue &queue)
GPU device, float version.
Definition device_scal.cc:65
void swap(int64_t n, float *x, int64_t incx, float *y, int64_t incy, blas::Queue &queue)
GPU device, float version.
Definition device_swap.cc:67
void symm(blas::Layout layout, blas::Side side, blas::Uplo uplo, int64_t m, int64_t n, float alpha, float const *A, int64_t lda, float const *B, int64_t ldb, float beta, float *C, int64_t ldc, blas::Queue &queue)
GPU device, float version.
Definition device_symm.cc:106
void symm(blas::Layout layout, std::vector< blas::Side > const &side, std::vector< blas::Uplo > const &uplo, std::vector< int64_t > const &m, std::vector< int64_t > const &n, std::vector< float > const &alpha, std::vector< float * > const &Aarray, std::vector< int64_t > const &lda, std::vector< float * > const &Barray, std::vector< int64_t > const &ldb, std::vector< float > const &beta, std::vector< float * > const &Carray, std::vector< int64_t > const &ldc, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
GPU device, variable-size batched, float version.
Definition device_batch_symm.cc:87
void symv(blas::Layout layout, blas::Uplo uplo, int64_t n, blas::scalar_type< TA, TX, TY > alpha, TA const *A, int64_t lda, TX const *x, int64_t incx, blas::scalar_type< TA, TX, TY > beta, TY *y, int64_t incy)
Symmetric matrix-vector multiply:
Definition symv.hh:66
void syr2(blas::Layout layout, blas::Uplo uplo, int64_t n, blas::scalar_type< TA, TX, TY > alpha, TX const *x, int64_t incx, TY const *y, int64_t incy, TA *A, int64_t lda)
Symmetric matrix rank-2 update:
Definition syr2.hh:63
void syr2k(blas::Layout layout, blas::Uplo uplo, blas::Op trans, int64_t n, int64_t k, float alpha, float const *A, int64_t lda, float const *B, int64_t ldb, float beta, float *C, int64_t ldc, blas::Queue &queue)
GPU device, float version.
Definition device_syr2k.cc:107
void syr2k(blas::Layout layout, std::vector< blas::Uplo > const &uplo, std::vector< blas::Op > const &trans, std::vector< int64_t > const &n, std::vector< int64_t > const &k, std::vector< float > const &alpha, std::vector< float * > const &Aarray, std::vector< int64_t > const &lda, std::vector< float * > const &Barray, std::vector< int64_t > const &ldb, std::vector< float > const &beta, std::vector< float * > const &Carray, std::vector< int64_t > const &ldc, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
GPU device, variable-size batched, float version.
Definition device_batch_syr2k.cc:87
void syr(blas::Layout layout, blas::Uplo uplo, int64_t n, blas::scalar_type< TA, TX > alpha, TX const *x, int64_t incx, TA *A, int64_t lda)
Symmetric matrix rank-1 update:
Definition syr.hh:56
void syrk(blas::Layout layout, std::vector< blas::Uplo > const &uplo, std::vector< blas::Op > const &trans, std::vector< int64_t > const &n, std::vector< int64_t > const &k, std::vector< float > const &alpha, std::vector< float * > const &Aarray, std::vector< int64_t > const &lda, std::vector< float > const &beta, std::vector< float * > const &Carray, std::vector< int64_t > const &ldc, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
GPU device, variable-size batched, float version.
Definition device_batch_syrk.cc:84
void syrk(blas::Layout layout, blas::Uplo uplo, blas::Op trans, int64_t n, int64_t k, float alpha, float const *A, int64_t lda, float beta, float *C, int64_t ldc, blas::Queue &queue)
GPU device, float version.
Definition device_syrk.cc:101
void trmm(blas::Layout layout, blas::Side side, blas::Uplo uplo, blas::Op trans, blas::Diag diag, int64_t m, int64_t n, float alpha, float const *A, int64_t lda, float *B, int64_t ldb, blas::Queue &queue)
GPU device, float version.
Definition device_trmm.cc:104
void trmm(blas::Layout layout, std::vector< blas::Side > const &side, std::vector< blas::Uplo > const &uplo, std::vector< blas::Op > const &trans, std::vector< blas::Diag > const &diag, std::vector< int64_t > const &m, std::vector< int64_t > const &n, std::vector< float > const &alpha, std::vector< float * > const &Aarray, std::vector< int64_t > const &lda, std::vector< float * > const &Barray, std::vector< int64_t > const &ldb, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
GPU device, variable-size batched, float version.
Definition device_batch_trmm.cc:95
void trmv(blas::Layout layout, blas::Uplo uplo, blas::Op trans, blas::Diag diag, int64_t n, TA const *A, int64_t lda, TX *x, int64_t incx)
Triangular matrix-vector multiply:
Definition trmv.hh:69
void trsm(blas::Layout layout, std::vector< blas::Side > const &side, std::vector< blas::Uplo > const &uplo, std::vector< blas::Op > const &trans, std::vector< blas::Diag > const &diag, std::vector< int64_t > const &m, std::vector< int64_t > const &n, std::vector< float > const &alpha, std::vector< float * > const &Aarray, std::vector< int64_t > const &lda, std::vector< float * > const &Barray, std::vector< int64_t > const &ldb, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
GPU device, variable-size batched, float version.
Definition device_batch_trsm.cc:145
void trsm(blas::Layout layout, blas::Side side, blas::Uplo uplo, blas::Op trans, blas::Diag diag, int64_t m, int64_t n, float alpha, float const *A, int64_t lda, float *B, int64_t ldb, blas::Queue &queue)
GPU device, float version.
Definition device_trsm.cc:104
void trsv(blas::Layout layout, blas::Uplo uplo, blas::Op trans, blas::Diag diag, int64_t n, TA const *A, int64_t lda, TX *x, int64_t incx)
Solve the triangular matrix-vector equation.
Definition trsv.hh:73