6#include "blas/device.hh"
20 float const* x, int64_t incx,
21 float* y, int64_t incy,
27 double const* x, int64_t incx,
28 double* y, int64_t incy,
33 std::complex<float> alpha,
34 std::complex<float>
const* x, int64_t incx,
35 std::complex<float>* y, int64_t incy,
40 std::complex<double> alpha,
41 std::complex<double>
const* x, int64_t incx,
42 std::complex<double>* y, int64_t incy,
48 float const* x, int64_t incx,
49 float* y, int64_t incy,
54 double const* x, int64_t incx,
55 double* y, int64_t incy,
60 std::complex<float>
const* x, int64_t incx,
61 std::complex<float>* y, int64_t incy,
66 std::complex<double>
const* x, int64_t incx,
67 std::complex<double>* y, int64_t incy,
73 float const* x, int64_t incx,
74 float const* y, int64_t incy,
80 double const* x, int64_t incx,
81 double const* y, int64_t incy,
87 std::complex<float>
const* x, int64_t incx,
88 std::complex<float>
const* y, int64_t incy,
89 std::complex<float>* result,
94 std::complex<double>
const* x, int64_t incx,
95 std::complex<double>
const* y, int64_t incy,
96 std::complex<double>* result,
102 float const* x, int64_t incx,
103 float const* y, int64_t incy,
109 double const* x, int64_t incx,
110 double const* y, int64_t incy,
116 std::complex<float>
const* x, int64_t incx,
117 std::complex<float>
const* y, int64_t incy,
118 std::complex<float>* result,
123 std::complex<double>
const* x, int64_t incx,
124 std::complex<double>
const* y, int64_t incy,
125 std::complex<double>* result,
131 float const* x, int64_t incx,
137 double const* x, int64_t incx,
143 std::complex<float>
const* x, int64_t incx,
149 std::complex<double>
const* x, int64_t incx,
157 float* x, int64_t incx,
163 double* x, int64_t incx,
168 std::complex<float> alpha,
169 std::complex<float>* x, int64_t incx,
174 std::complex<double> alpha,
175 std::complex<double>* x, int64_t incx,
181 float* x, int64_t incx,
182 float* y, int64_t incy,
187 double* x, int64_t incx,
188 double* y, int64_t incy,
193 std::complex<float>* x, int64_t incx,
194 std::complex<float>* y, int64_t incy,
199 std::complex<double>* x, int64_t incx,
200 std::complex<double>* y, int64_t incy,
214 int64_t m, int64_t n, int64_t k,
216 float const* A, int64_t lda,
217 float const* B, int64_t ldb,
219 float* C, int64_t ldc,
226 int64_t m, int64_t n, int64_t k,
228 double const* A, int64_t lda,
229 double const* B, int64_t ldb,
231 double* C, int64_t ldc,
238 int64_t m, int64_t n, int64_t k,
239 std::complex<float> alpha,
240 std::complex<float>
const* A, int64_t lda,
241 std::complex<float>
const* B, int64_t ldb,
242 std::complex<float> beta,
243 std::complex<float>* C, int64_t ldc,
250 int64_t m, int64_t n, int64_t k,
251 std::complex<double> alpha,
252 std::complex<double>
const* A, int64_t lda,
253 std::complex<double>
const* B, int64_t ldb,
254 std::complex<double> beta,
255 std::complex<double>* C, int64_t ldc,
263 int64_t m, int64_t n,
265 float const* A, int64_t lda,
266 float const* B, int64_t ldb,
268 float* C, int64_t ldc,
275 int64_t m, int64_t n,
277 double const* A, int64_t lda,
278 double const* B, int64_t ldb,
280 double* C, int64_t ldc,
287 int64_t m, int64_t n,
288 std::complex<float> alpha,
289 std::complex<float>
const* A, int64_t lda,
290 std::complex<float>
const* B, int64_t ldb,
291 std::complex<float> beta,
292 std::complex<float>* C, int64_t ldc,
299 int64_t m, int64_t n,
300 std::complex<double> alpha,
301 std::complex<double>
const* A, int64_t lda,
302 std::complex<double>
const* B, int64_t ldb,
303 std::complex<double> beta,
304 std::complex<double>* C, int64_t ldc,
312 int64_t n, int64_t k,
314 float const* A, int64_t lda,
315 float const* B, int64_t ldb,
317 float* C, int64_t ldc,
324 int64_t n, int64_t k,
326 double const* A, int64_t lda,
327 double const* B, int64_t ldb,
329 double* C, int64_t ldc,
336 int64_t n, int64_t k,
337 std::complex<float> alpha,
338 std::complex<float>
const* A, int64_t lda,
339 std::complex<float>
const* B, int64_t ldb,
341 std::complex<float>* C, int64_t ldc,
348 int64_t n, int64_t k,
349 std::complex<double> alpha,
350 std::complex<double>
const* A, int64_t lda,
351 std::complex<double>
const* B, int64_t ldb,
353 std::complex<double>* C, int64_t ldc,
361 int64_t n, int64_t k,
363 float const* A, int64_t lda,
365 float* C, int64_t ldc,
372 int64_t n, int64_t k,
374 double const* A, int64_t lda,
376 double* C, int64_t ldc,
383 int64_t n, int64_t k,
385 std::complex<float>
const* A, int64_t lda,
387 std::complex<float>* C, int64_t ldc,
394 int64_t n, int64_t k,
396 std::complex<double>
const* A, int64_t lda,
398 std::complex<double>* C, int64_t ldc,
406 int64_t m, int64_t n,
408 float const* A, int64_t lda,
409 float const* B, int64_t ldb,
411 float* C, int64_t ldc,
418 int64_t m, int64_t n,
420 double const* A, int64_t lda,
421 double const* B, int64_t ldb,
423 double* C, int64_t ldc,
430 int64_t m, int64_t n,
431 std::complex<float> alpha,
432 std::complex<float>
const* A, int64_t lda,
433 std::complex<float>
const* B, int64_t ldb,
434 std::complex<float> beta,
435 std::complex<float>* C, int64_t ldc,
442 int64_t m, int64_t n,
443 std::complex<double> alpha,
444 std::complex<double>
const* A, int64_t lda,
445 std::complex<double>
const* B, int64_t ldb,
446 std::complex<double> beta,
447 std::complex<double>* C, int64_t ldc,
455 int64_t n, int64_t k,
457 float const* A, int64_t lda,
458 float const* B, int64_t ldb,
460 float* C, int64_t ldc,
467 int64_t n, int64_t k,
469 double const* A, int64_t lda,
470 double const* B, int64_t ldb,
472 double* C, int64_t ldc,
479 int64_t n, int64_t k,
480 std::complex<float> alpha,
481 std::complex<float>
const* A, int64_t lda,
482 std::complex<float>
const* B, int64_t ldb,
483 std::complex<float> beta,
484 std::complex<float>* C, int64_t ldc,
491 int64_t n, int64_t k,
492 std::complex<double> alpha,
493 std::complex<double>
const* A, int64_t lda,
494 std::complex<double>
const* B, int64_t ldb,
495 std::complex<double> beta,
496 std::complex<double>* C, int64_t ldc,
504 int64_t n, int64_t k,
506 float const* A, int64_t lda,
508 float* C, int64_t ldc,
515 int64_t n, int64_t k,
517 double const* A, int64_t lda,
519 double* C, int64_t ldc,
526 int64_t n, int64_t k,
527 std::complex<float> alpha,
528 std::complex<float>
const* A, int64_t lda,
529 std::complex<float> beta,
530 std::complex<float>* C, int64_t ldc,
537 int64_t n, int64_t k,
538 std::complex<double> alpha,
539 std::complex<double>
const* A, int64_t lda,
540 std::complex<double> beta,
541 std::complex<double>* C, int64_t ldc,
554 float const* A, int64_t lda,
555 float* B, int64_t ldb,
567 double const* A, int64_t lda,
568 double* B, int64_t ldb,
579 std::complex<float> alpha,
580 std::complex<float>
const* A, int64_t lda,
581 std::complex<float>* B, int64_t ldb,
592 std::complex<double> alpha,
593 std::complex<double>
const* A, int64_t lda,
594 std::complex<double>* B, int64_t ldb,
607 float const* A, int64_t lda,
608 float* B, int64_t ldb,
620 double const* A, int64_t lda,
621 double* B, int64_t ldb,
632 std::complex<float> alpha,
633 std::complex<float>
const* A, int64_t lda,
634 std::complex<float>* B, int64_t ldb,
645 std::complex<double> alpha,
646 std::complex<double>
const* A, int64_t lda,
647 std::complex<double>* B, int64_t ldb,
668 std::vector<blas::Op>
const& transA,
669 std::vector<blas::Op>
const& transB,
670 std::vector<int64_t>
const& m,
671 std::vector<int64_t>
const& n,
672 std::vector<int64_t>
const& k,
673 std::vector<float >
const& alpha,
674 std::vector<float*>
const& Aarray, std::vector<int64_t>
const& lda,
675 std::vector<float*>
const& Barray, std::vector<int64_t>
const& ldb,
676 std::vector<float >
const& beta,
677 std::vector<float*>
const& Carray, std::vector<int64_t>
const& ldc,
679 std::vector<int64_t>& info,
684 std::vector<blas::Op>
const& transA,
685 std::vector<blas::Op>
const& transB,
686 std::vector<int64_t>
const& m,
687 std::vector<int64_t>
const& n,
688 std::vector<int64_t>
const& k,
689 std::vector<double >
const& alpha,
690 std::vector<double*>
const& Aarray, std::vector<int64_t>
const& lda,
691 std::vector<double*>
const& Barray, std::vector<int64_t>
const& ldb,
692 std::vector<double >
const& beta,
693 std::vector<double*>
const& Carray, std::vector<int64_t>
const& ldc,
695 std::vector<int64_t>& info,
700 std::vector<blas::Op>
const& transA,
701 std::vector<blas::Op>
const& transB,
702 std::vector<int64_t>
const& m,
703 std::vector<int64_t>
const& n,
704 std::vector<int64_t>
const& k,
705 std::vector< std::complex<float> >
const& alpha,
706 std::vector< std::complex<float>* >
const& Aarray, std::vector<int64_t>
const& lda,
707 std::vector< std::complex<float>* >
const& Barray, std::vector<int64_t>
const& ldb,
708 std::vector< std::complex<float> >
const& beta,
709 std::vector< std::complex<float>* >
const& Carray, std::vector<int64_t>
const& ldc,
711 std::vector<int64_t>& info,
716 std::vector<blas::Op>
const& transA,
717 std::vector<blas::Op>
const& transB,
718 std::vector<int64_t>
const& m,
719 std::vector<int64_t>
const& n,
720 std::vector<int64_t>
const& k,
721 std::vector< std::complex<double> >
const& alpha,
722 std::vector< std::complex<double>* >
const& Aarray, std::vector<int64_t>
const& lda,
723 std::vector< std::complex<double>* >
const& Barray, std::vector<int64_t>
const& ldb,
724 std::vector< std::complex<double> >
const& beta,
725 std::vector< std::complex<double>* >
const& Carray, std::vector<int64_t>
const& ldc,
727 std::vector<int64_t>& info,
734 std::vector<blas::Op>
const& transA,
735 std::vector<blas::Op>
const& transB,
736 std::vector<int64_t>
const& m,
737 std::vector<int64_t>
const& n,
738 std::vector<int64_t>
const& k,
739 std::vector<float >
const& alpha,
740 std::vector<float*>
const& Aarray, std::vector<int64_t>
const& lda,
741 std::vector<float*>
const& Barray, std::vector<int64_t>
const& ldb,
742 std::vector<float >
const& beta,
743 std::vector<float*>
const& Carray, std::vector<int64_t>
const& ldc,
744 std::vector<size_t>
const& group_size,
745 std::vector<int64_t>& info,
750 std::vector<blas::Op>
const& transA,
751 std::vector<blas::Op>
const& transB,
752 std::vector<int64_t>
const& m,
753 std::vector<int64_t>
const& n,
754 std::vector<int64_t>
const& k,
755 std::vector<double >
const& alpha,
756 std::vector<double*>
const& Aarray, std::vector<int64_t>
const& lda,
757 std::vector<double*>
const& Barray, std::vector<int64_t>
const& ldb,
758 std::vector<double >
const& beta,
759 std::vector<double*>
const& Carray, std::vector<int64_t>
const& ldc,
760 std::vector<size_t>
const& group_size,
761 std::vector<int64_t>& info,
766 std::vector<blas::Op>
const& transA,
767 std::vector<blas::Op>
const& transB,
768 std::vector<int64_t>
const& m,
769 std::vector<int64_t>
const& n,
770 std::vector<int64_t>
const& k,
771 std::vector< std::complex<float> >
const& alpha,
772 std::vector< std::complex<float>* >
const& Aarray, std::vector<int64_t>
const& lda,
773 std::vector< std::complex<float>* >
const& Barray, std::vector<int64_t>
const& ldb,
774 std::vector< std::complex<float> >
const& beta,
775 std::vector< std::complex<float>* >
const& Carray, std::vector<int64_t>
const& ldc,
776 std::vector<size_t>
const& group_size,
777 std::vector<int64_t>& info,
782 std::vector<blas::Op>
const& transA,
783 std::vector<blas::Op>
const& transB,
784 std::vector<int64_t>
const& m,
785 std::vector<int64_t>
const& n,
786 std::vector<int64_t>
const& k,
787 std::vector< std::complex<double> >
const& alpha,
788 std::vector< std::complex<double>* >
const& Aarray, std::vector<int64_t>
const& lda,
789 std::vector< std::complex<double>* >
const& Barray, std::vector<int64_t>
const& ldb,
790 std::vector< std::complex<double> >
const& beta,
791 std::vector< std::complex<double>* >
const& Carray, std::vector<int64_t>
const& ldc,
792 std::vector<size_t>
const& group_size,
793 std::vector<int64_t>& info,
800 std::vector<blas::Side>
const& side,
801 std::vector<blas::Uplo>
const& uplo,
802 std::vector<int64_t>
const& m,
803 std::vector<int64_t>
const& n,
804 std::vector<float >
const& alpha,
805 std::vector<float*>
const& Aarray, std::vector<int64_t>
const& lda,
806 std::vector<float*>
const& Barray, std::vector<int64_t>
const& ldb,
807 std::vector<float >
const& beta,
808 std::vector<float*>
const& Carray, std::vector<int64_t>
const& ldc,
810 std::vector<int64_t>& info,
815 std::vector<blas::Side>
const& side,
816 std::vector<blas::Uplo>
const& uplo,
817 std::vector<int64_t>
const& m,
818 std::vector<int64_t>
const& n,
819 std::vector<double >
const& alpha,
820 std::vector<double*>
const& Aarray, std::vector<int64_t>
const& lda,
821 std::vector<double*>
const& Barray, std::vector<int64_t>
const& ldb,
822 std::vector<double >
const& beta,
823 std::vector<double*>
const& Carray, std::vector<int64_t>
const& ldc,
825 std::vector<int64_t>& info,
830 std::vector<blas::Side>
const& side,
831 std::vector<blas::Uplo>
const& uplo,
832 std::vector<int64_t>
const& m,
833 std::vector<int64_t>
const& n,
834 std::vector< std::complex<float> >
const& alpha,
835 std::vector< std::complex<float>* >
const& Aarray, std::vector<int64_t>
const& lda,
836 std::vector< std::complex<float>* >
const& Barray, std::vector<int64_t>
const& ldb,
837 std::vector< std::complex<float> >
const& beta,
838 std::vector< std::complex<float>* >
const& Carray, std::vector<int64_t>
const& ldc,
840 std::vector<int64_t>& info,
845 std::vector<blas::Side>
const& side,
846 std::vector<blas::Uplo>
const& uplo,
847 std::vector<int64_t>
const& m,
848 std::vector<int64_t>
const& n,
849 std::vector< std::complex<double> >
const& alpha,
850 std::vector< std::complex<double>* >
const& Aarray, std::vector<int64_t>
const& lda,
851 std::vector< std::complex<double>* >
const& Barray, std::vector<int64_t>
const& ldb,
852 std::vector< std::complex<double> >
const& beta,
853 std::vector< std::complex<double>* >
const& Carray, std::vector<int64_t>
const& ldc,
855 std::vector<int64_t>& info,
862 std::vector<blas::Uplo>
const& uplo,
863 std::vector<blas::Op>
const& trans,
864 std::vector<int64_t>
const& n,
865 std::vector<int64_t>
const& k,
866 std::vector<float >
const& alpha,
867 std::vector<float*>
const& Aarray, std::vector<int64_t>
const& lda,
868 std::vector<float*>
const& Barray, std::vector<int64_t>
const& ldb,
869 std::vector<float >
const& beta,
870 std::vector<float*>
const& Carray, std::vector<int64_t>
const& ldc,
872 std::vector<int64_t>& info,
877 std::vector<blas::Uplo>
const& uplo,
878 std::vector<blas::Op>
const& trans,
879 std::vector<int64_t>
const& n,
880 std::vector<int64_t>
const& k,
881 std::vector<double >
const& alpha,
882 std::vector<double*>
const& Aarray, std::vector<int64_t>
const& lda,
883 std::vector<double*>
const& Barray, std::vector<int64_t>
const& ldb,
884 std::vector<double >
const& beta,
885 std::vector<double*>
const& Carray, std::vector<int64_t>
const& ldc,
887 std::vector<int64_t>& info,
892 std::vector<blas::Uplo>
const& uplo,
893 std::vector<blas::Op>
const& trans,
894 std::vector<int64_t>
const& n,
895 std::vector<int64_t>
const& k,
896 std::vector< std::complex<float> >
const& alpha,
897 std::vector< std::complex<float>* >
const& Aarray, std::vector<int64_t>
const& lda,
898 std::vector< std::complex<float>* >
const& Barray, std::vector<int64_t>
const& ldb,
899 std::vector< float >
const& beta,
900 std::vector< std::complex<float>* >
const& Carray, std::vector<int64_t>
const& ldc,
902 std::vector<int64_t>& info,
907 std::vector<blas::Uplo>
const& uplo,
908 std::vector<blas::Op>
const& trans,
909 std::vector<int64_t>
const& n,
910 std::vector<int64_t>
const& k,
911 std::vector< std::complex<double> >
const& alpha,
912 std::vector< std::complex<double>* >
const& Aarray, std::vector<int64_t>
const& lda,
913 std::vector< std::complex<double>* >
const& Barray, std::vector<int64_t>
const& ldb,
914 std::vector< double >
const& beta,
915 std::vector< std::complex<double>* >
const& Carray, std::vector<int64_t>
const& ldc,
917 std::vector<int64_t>& info,
924 std::vector<blas::Uplo>
const& uplo,
925 std::vector<blas::Op>
const& trans,
926 std::vector<int64_t>
const& n,
927 std::vector<int64_t>
const& k,
928 std::vector<float >
const& alpha,
929 std::vector<float*>
const& Aarray, std::vector<int64_t>
const& lda,
930 std::vector<float >
const& beta,
931 std::vector<float*>
const& Carray, std::vector<int64_t>
const& ldc,
933 std::vector<int64_t>& info,
938 std::vector<blas::Uplo>
const& uplo,
939 std::vector<blas::Op>
const& trans,
940 std::vector<int64_t>
const& n,
941 std::vector<int64_t>
const& k,
942 std::vector<double >
const& alpha,
943 std::vector<double*>
const& Aarray, std::vector<int64_t>
const& lda,
944 std::vector<double >
const& beta,
945 std::vector<double*>
const& Carray, std::vector<int64_t>
const& ldc,
947 std::vector<int64_t>& info,
952 std::vector<blas::Uplo>
const& uplo,
953 std::vector<blas::Op>
const& trans,
954 std::vector<int64_t>
const& n,
955 std::vector<int64_t>
const& k,
956 std::vector< float >
const& alpha,
957 std::vector< std::complex<float>* >
const& Aarray, std::vector<int64_t>
const& lda,
958 std::vector< float >
const& beta,
959 std::vector< std::complex<float>* >
const& Carray, std::vector<int64_t>
const& ldc,
961 std::vector<int64_t>& info,
966 std::vector<blas::Uplo>
const& uplo,
967 std::vector<blas::Op>
const& trans,
968 std::vector<int64_t>
const& n,
969 std::vector<int64_t>
const& k,
970 std::vector< double >
const& alpha,
971 std::vector< std::complex<double>* >
const& Aarray, std::vector<int64_t>
const& lda,
972 std::vector< double >
const& beta,
973 std::vector< std::complex<double>* >
const& Carray, std::vector<int64_t>
const& ldc,
975 std::vector<int64_t>& info,
982 std::vector<blas::Side>
const& side,
983 std::vector<blas::Uplo>
const& uplo,
984 std::vector<int64_t>
const& m,
985 std::vector<int64_t>
const& n,
986 std::vector<float >
const& alpha,
987 std::vector<float*>
const& Aarray, std::vector<int64_t>
const& lda,
988 std::vector<float*>
const& Barray, std::vector<int64_t>
const& ldb,
989 std::vector<float >
const& beta,
990 std::vector<float*>
const& Carray, std::vector<int64_t>
const& ldc,
992 std::vector<int64_t>& info,
997 std::vector<blas::Side>
const& side,
998 std::vector<blas::Uplo>
const& uplo,
999 std::vector<int64_t>
const& m,
1000 std::vector<int64_t>
const& n,
1001 std::vector<double >
const& alpha,
1002 std::vector<double*>
const& Aarray, std::vector<int64_t>
const& lda,
1003 std::vector<double*>
const& Barray, std::vector<int64_t>
const& ldb,
1004 std::vector<double >
const& beta,
1005 std::vector<double*>
const& Carray, std::vector<int64_t>
const& ldc,
1007 std::vector<int64_t>& info,
1011 blas::Layout layout,
1012 std::vector<blas::Side>
const& side,
1013 std::vector<blas::Uplo>
const& uplo,
1014 std::vector<int64_t>
const& m,
1015 std::vector<int64_t>
const& n,
1016 std::vector< std::complex<float> >
const& alpha,
1017 std::vector< std::complex<float>* >
const& Aarray, std::vector<int64_t>
const& lda,
1018 std::vector< std::complex<float>* >
const& Barray, std::vector<int64_t>
const& ldb,
1019 std::vector< std::complex<float> >
const& beta,
1020 std::vector< std::complex<float>* >
const& Carray, std::vector<int64_t>
const& ldc,
1022 std::vector<int64_t>& info,
1026 blas::Layout layout,
1027 std::vector<blas::Side>
const& side,
1028 std::vector<blas::Uplo>
const& uplo,
1029 std::vector<int64_t>
const& m,
1030 std::vector<int64_t>
const& n,
1031 std::vector< std::complex<double> >
const& alpha,
1032 std::vector< std::complex<double>* >
const& Aarray, std::vector<int64_t>
const& lda,
1033 std::vector< std::complex<double>* >
const& Barray, std::vector<int64_t>
const& ldb,
1034 std::vector< std::complex<double> >
const& beta,
1035 std::vector< std::complex<double>* >
const& Carray, std::vector<int64_t>
const& ldc,
1037 std::vector<int64_t>& info,
1043 blas::Layout layout,
1044 std::vector<blas::Uplo>
const& uplo,
1045 std::vector<blas::Op>
const& trans,
1046 std::vector<int64_t>
const& n,
1047 std::vector<int64_t>
const& k,
1048 std::vector<float >
const& alpha,
1049 std::vector<float*>
const& Aarray, std::vector<int64_t>
const& lda,
1050 std::vector<float*>
const& Barray, std::vector<int64_t>
const& ldb,
1051 std::vector<float >
const& beta,
1052 std::vector<float*>
const& Carray, std::vector<int64_t>
const& ldc,
1054 std::vector<int64_t>& info,
1058 blas::Layout layout,
1059 std::vector<blas::Uplo>
const& uplo,
1060 std::vector<blas::Op>
const& trans,
1061 std::vector<int64_t>
const& n,
1062 std::vector<int64_t>
const& k,
1063 std::vector<double >
const& alpha,
1064 std::vector<double*>
const& Aarray, std::vector<int64_t>
const& lda,
1065 std::vector<double*>
const& Barray, std::vector<int64_t>
const& ldb,
1066 std::vector<double >
const& beta,
1067 std::vector<double*>
const& Carray, std::vector<int64_t>
const& ldc,
1069 std::vector<int64_t>& info,
1073 blas::Layout layout,
1074 std::vector<blas::Uplo>
const& uplo,
1075 std::vector<blas::Op>
const& trans,
1076 std::vector<int64_t>
const& n,
1077 std::vector<int64_t>
const& k,
1078 std::vector< std::complex<float> >
const& alpha,
1079 std::vector< std::complex<float>* >
const& Aarray, std::vector<int64_t>
const& lda,
1080 std::vector< std::complex<float>* >
const& Barray, std::vector<int64_t>
const& ldb,
1081 std::vector< std::complex<float> >
const& beta,
1082 std::vector< std::complex<float>* >
const& Carray, std::vector<int64_t>
const& ldc,
1084 std::vector<int64_t>& info,
1088 blas::Layout layout,
1089 std::vector<blas::Uplo>
const& uplo,
1090 std::vector<blas::Op>
const& trans,
1091 std::vector<int64_t>
const& n,
1092 std::vector<int64_t>
const& k,
1093 std::vector< std::complex<double> >
const& alpha,
1094 std::vector< std::complex<double>* >
const& Aarray, std::vector<int64_t>
const& lda,
1095 std::vector< std::complex<double>* >
const& Barray, std::vector<int64_t>
const& ldb,
1096 std::vector< std::complex<double> >
const& beta,
1097 std::vector< std::complex<double>* >
const& Carray, std::vector<int64_t>
const& ldc,
1099 std::vector<int64_t>& info,
1105 blas::Layout layout,
1106 std::vector<blas::Uplo>
const& uplo,
1107 std::vector<blas::Op>
const& trans,
1108 std::vector<int64_t>
const& n,
1109 std::vector<int64_t>
const& k,
1110 std::vector<float >
const& alpha,
1111 std::vector<float*>
const& Aarray, std::vector<int64_t>
const& lda,
1112 std::vector<float >
const& beta,
1113 std::vector<float*>
const& Carray, std::vector<int64_t>
const& ldc,
1115 std::vector<int64_t>& info,
1119 blas::Layout layout,
1120 std::vector<blas::Uplo>
const& uplo,
1121 std::vector<blas::Op>
const& trans,
1122 std::vector<int64_t>
const& n,
1123 std::vector<int64_t>
const& k,
1124 std::vector<double >
const& alpha,
1125 std::vector<double*>
const& Aarray, std::vector<int64_t>
const& lda,
1126 std::vector<double >
const& beta,
1127 std::vector<double*>
const& Carray, std::vector<int64_t>
const& ldc,
1129 std::vector<int64_t>& info,
1133 blas::Layout layout,
1134 std::vector<blas::Uplo>
const& uplo,
1135 std::vector<blas::Op>
const& trans,
1136 std::vector<int64_t>
const& n,
1137 std::vector<int64_t>
const& k,
1138 std::vector< std::complex<float> >
const& alpha,
1139 std::vector< std::complex<float>* >
const& Aarray, std::vector<int64_t>
const& lda,
1140 std::vector< std::complex<float> >
const& beta,
1141 std::vector< std::complex<float>* >
const& Carray, std::vector<int64_t>
const& ldc,
1143 std::vector<int64_t>& info,
1147 blas::Layout layout,
1148 std::vector<blas::Uplo>
const& uplo,
1149 std::vector<blas::Op>
const& trans,
1150 std::vector<int64_t>
const& n,
1151 std::vector<int64_t>
const& k,
1152 std::vector< std::complex<double> >
const& alpha,
1153 std::vector< std::complex<double>* >
const& Aarray, std::vector<int64_t>
const& lda,
1154 std::vector< std::complex<double> >
const& beta,
1155 std::vector< std::complex<double>* >
const& Carray, std::vector<int64_t>
const& ldc,
1157 std::vector<int64_t>& info,
1163 blas::Layout layout,
1164 std::vector<blas::Side>
const& side,
1165 std::vector<blas::Uplo>
const& uplo,
1166 std::vector<blas::Op>
const& trans,
1167 std::vector<blas::Diag>
const& diag,
1168 std::vector<int64_t>
const& m,
1169 std::vector<int64_t>
const& n,
1170 std::vector<float >
const& alpha,
1171 std::vector<float*>
const& Aarray, std::vector<int64_t>
const& lda,
1172 std::vector<float*>
const& Barray, std::vector<int64_t>
const& ldb,
1174 std::vector<int64_t>& info,
1178 blas::Layout layout,
1179 std::vector<blas::Side>
const& side,
1180 std::vector<blas::Uplo>
const& uplo,
1181 std::vector<blas::Op>
const& trans,
1182 std::vector<blas::Diag>
const& diag,
1183 std::vector<int64_t>
const& m,
1184 std::vector<int64_t>
const& n,
1185 std::vector<double >
const& alpha,
1186 std::vector<double*>
const& Aarray, std::vector<int64_t>
const& lda,
1187 std::vector<double*>
const& Barray, std::vector<int64_t>
const& ldb,
1189 std::vector<int64_t>& info,
1193 blas::Layout layout,
1194 std::vector<blas::Side>
const& side,
1195 std::vector<blas::Uplo>
const& uplo,
1196 std::vector<blas::Op>
const& trans,
1197 std::vector<blas::Diag>
const& diag,
1198 std::vector<int64_t>
const& m,
1199 std::vector<int64_t>
const& n,
1200 std::vector< std::complex<float> >
const& alpha,
1201 std::vector< std::complex<float>* >
const& Aarray, std::vector<int64_t>
const& lda,
1202 std::vector< std::complex<float>* >
const& Barray, std::vector<int64_t>
const& ldb,
1204 std::vector<int64_t>& info,
1208 blas::Layout layout,
1209 std::vector<blas::Side>
const& side,
1210 std::vector<blas::Uplo>
const& uplo,
1211 std::vector<blas::Op>
const& trans,
1212 std::vector<blas::Diag>
const& diag,
1213 std::vector<int64_t>
const& m,
1214 std::vector<int64_t>
const& n,
1215 std::vector< std::complex<double> >
const& alpha,
1216 std::vector< std::complex<double>* >
const& Aarray, std::vector<int64_t>
const& lda,
1217 std::vector< std::complex<double>* >
const& Barray, std::vector<int64_t>
const& ldb,
1219 std::vector<int64_t>& info,
1225 blas::Layout layout,
1226 std::vector<blas::Side>
const& side,
1227 std::vector<blas::Uplo>
const& uplo,
1228 std::vector<blas::Op>
const& trans,
1229 std::vector<blas::Diag>
const& diag,
1230 std::vector<int64_t>
const& m,
1231 std::vector<int64_t>
const& n,
1232 std::vector<float >
const& alpha,
1233 std::vector<float*>
const& Aarray, std::vector<int64_t>
const& lda,
1234 std::vector<float*>
const& Barray, std::vector<int64_t>
const& ldb,
1236 std::vector<int64_t>& info,
1240 blas::Layout layout,
1241 std::vector<blas::Side>
const& side,
1242 std::vector<blas::Uplo>
const& uplo,
1243 std::vector<blas::Op>
const& trans,
1244 std::vector<blas::Diag>
const& diag,
1245 std::vector<int64_t>
const& m,
1246 std::vector<int64_t>
const& n,
1247 std::vector<double >
const& alpha,
1248 std::vector<double*>
const& Aarray, std::vector<int64_t>
const& lda,
1249 std::vector<double*>
const& Barray, std::vector<int64_t>
const& ldb,
1251 std::vector<int64_t>& info,
1255 blas::Layout layout,
1256 std::vector<blas::Side>
const& side,
1257 std::vector<blas::Uplo>
const& uplo,
1258 std::vector<blas::Op>
const& trans,
1259 std::vector<blas::Diag>
const& diag,
1260 std::vector<int64_t>
const& m,
1261 std::vector<int64_t>
const& n,
1262 std::vector< std::complex<float> >
const& alpha,
1263 std::vector< std::complex<float>* >
const& Aarray, std::vector<int64_t>
const& lda,
1264 std::vector< std::complex<float>* >
const& Barray, std::vector<int64_t>
const& ldb,
1266 std::vector<int64_t>& info,
1270 blas::Layout layout,
1271 std::vector<blas::Side>
const& side,
1272 std::vector<blas::Uplo>
const& uplo,
1273 std::vector<blas::Op>
const& trans,
1274 std::vector<blas::Diag>
const& diag,
1275 std::vector<int64_t>
const& m,
1276 std::vector<int64_t>
const& n,
1277 std::vector< std::complex<double> >
const& alpha,
1278 std::vector< std::complex<double>* >
const& Aarray, std::vector<int64_t>
const& lda,
1279 std::vector< std::complex<double>* >
const& Barray, std::vector<int64_t>
const& ldb,
1281 std::vector<int64_t>& info,
Queue for executing GPU device routines.
Definition device.hh:143
void axpy(int64_t n, blas::scalar_type< TX, TY > alpha, TX const *x, int64_t incx, TY *y, int64_t incy)
Add scaled vector, .
Definition axpy.hh:43
void copy(int64_t n, TX const *x, int64_t incx, TY *y, int64_t incy)
Copy vector, .
Definition copy.hh:40
void dot(int64_t n, float const *x, int64_t incx, float const *y, int64_t incy, float *result, blas::Queue &queue)
GPU device, float version.
Definition device_dot.cc:139
void dotu(int64_t n, float const *x, int64_t incx, float const *y, int64_t incy, float *result, blas::Queue &queue)
GPU device, float, unconjugated x^T y version.
Definition device_dot.cc:194
void gemm(blas::Layout layout, std::vector< blas::Op > const &transA, std::vector< blas::Op > const &transB, std::vector< int64_t > const &m, std::vector< int64_t > const &n, std::vector< int64_t > const &k, std::vector< float > const &alpha, std::vector< float * > const &Aarray, std::vector< int64_t > const &lda, std::vector< float * > const &Barray, std::vector< int64_t > const &ldb, std::vector< float > const &beta, std::vector< float * > const &Carray, std::vector< int64_t > const &ldc, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
GPU device, variable-size batched, float version.
Definition device_batch_gemm.cc:163
void gemm(blas::Layout layout, blas::Op transA, blas::Op transB, int64_t m, int64_t n, int64_t k, float alpha, float const *A, int64_t lda, float const *B, int64_t ldb, float beta, float *C, int64_t ldc, blas::Queue &queue)
GPU device, float version.
Definition device_gemm.cc:119
void hemm(blas::Layout layout, blas::Side side, blas::Uplo uplo, int64_t m, int64_t n, float alpha, float const *A, int64_t lda, float const *B, int64_t ldb, float beta, float *C, int64_t ldc, blas::Queue &queue)
GPU device, float version.
Definition device_hemm.cc:107
void hemm(blas::Layout layout, std::vector< blas::Side > const &side, std::vector< blas::Uplo > const &uplo, std::vector< int64_t > const &m, std::vector< int64_t > const &n, std::vector< float > const &alpha, std::vector< float * > const &Aarray, std::vector< int64_t > const &lda, std::vector< float * > const &Barray, std::vector< int64_t > const &ldb, std::vector< float > const &beta, std::vector< float * > const &Carray, std::vector< int64_t > const &ldc, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
GPU device, variable-size batched, float version.
Definition device_batch_hemm.cc:102
void her2k(blas::Layout layout, std::vector< blas::Uplo > const &uplo, std::vector< blas::Op > const &trans, std::vector< int64_t > const &n, std::vector< int64_t > const &k, std::vector< float > const &alpha, std::vector< float * > const &Aarray, std::vector< int64_t > const &lda, std::vector< float * > const &Barray, std::vector< int64_t > const &ldb, std::vector< float > const &beta, std::vector< float * > const &Carray, std::vector< int64_t > const &ldc, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
GPU device, variable-size batched, float version.
Definition device_batch_her2k.cc:89
void her2k(blas::Layout layout, blas::Uplo uplo, blas::Op trans, int64_t n, int64_t k, float alpha, float const *A, int64_t lda, float const *B, int64_t ldb, float beta, float *C, int64_t ldc, blas::Queue &queue)
GPU device, float version.
Definition device_her2k.cc:100
void herk(blas::Layout layout, std::vector< blas::Uplo > const &uplo, std::vector< blas::Op > const &trans, std::vector< int64_t > const &n, std::vector< int64_t > const &k, std::vector< float > const &alpha, std::vector< float * > const &Aarray, std::vector< int64_t > const &lda, std::vector< float > const &beta, std::vector< float * > const &Carray, std::vector< int64_t > const &ldc, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
GPU device, variable-size batched, float version.
Definition device_batch_herk.cc:87
void herk(blas::Layout layout, blas::Uplo uplo, blas::Op trans, int64_t n, int64_t k, float alpha, float const *A, int64_t lda, float beta, float *C, int64_t ldc, blas::Queue &queue)
GPU device, float version.
Definition device_herk.cc:92
void nrm2(int64_t n, float const *x, int64_t incx, float *result, blas::Queue &queue)
GPU device, float version.
Definition device_nrm2.cc:84
void scal(int64_t n, float alpha, float *x, int64_t incx, blas::Queue &queue)
GPU device, float version.
Definition device_scal.cc:65
void swap(int64_t n, float *x, int64_t incx, float *y, int64_t incy, blas::Queue &queue)
GPU device, float version.
Definition device_swap.cc:67
void symm(blas::Layout layout, blas::Side side, blas::Uplo uplo, int64_t m, int64_t n, float alpha, float const *A, int64_t lda, float const *B, int64_t ldb, float beta, float *C, int64_t ldc, blas::Queue &queue)
GPU device, float version.
Definition device_symm.cc:106
void symm(blas::Layout layout, std::vector< blas::Side > const &side, std::vector< blas::Uplo > const &uplo, std::vector< int64_t > const &m, std::vector< int64_t > const &n, std::vector< float > const &alpha, std::vector< float * > const &Aarray, std::vector< int64_t > const &lda, std::vector< float * > const &Barray, std::vector< int64_t > const &ldb, std::vector< float > const &beta, std::vector< float * > const &Carray, std::vector< int64_t > const &ldc, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
GPU device, variable-size batched, float version.
Definition device_batch_symm.cc:87
void syr2k(blas::Layout layout, blas::Uplo uplo, blas::Op trans, int64_t n, int64_t k, float alpha, float const *A, int64_t lda, float const *B, int64_t ldb, float beta, float *C, int64_t ldc, blas::Queue &queue)
GPU device, float version.
Definition device_syr2k.cc:107
void syr2k(blas::Layout layout, std::vector< blas::Uplo > const &uplo, std::vector< blas::Op > const &trans, std::vector< int64_t > const &n, std::vector< int64_t > const &k, std::vector< float > const &alpha, std::vector< float * > const &Aarray, std::vector< int64_t > const &lda, std::vector< float * > const &Barray, std::vector< int64_t > const &ldb, std::vector< float > const &beta, std::vector< float * > const &Carray, std::vector< int64_t > const &ldc, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
GPU device, variable-size batched, float version.
Definition device_batch_syr2k.cc:87
void syrk(blas::Layout layout, std::vector< blas::Uplo > const &uplo, std::vector< blas::Op > const &trans, std::vector< int64_t > const &n, std::vector< int64_t > const &k, std::vector< float > const &alpha, std::vector< float * > const &Aarray, std::vector< int64_t > const &lda, std::vector< float > const &beta, std::vector< float * > const &Carray, std::vector< int64_t > const &ldc, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
GPU device, variable-size batched, float version.
Definition device_batch_syrk.cc:84
void syrk(blas::Layout layout, blas::Uplo uplo, blas::Op trans, int64_t n, int64_t k, float alpha, float const *A, int64_t lda, float beta, float *C, int64_t ldc, blas::Queue &queue)
GPU device, float version.
Definition device_syrk.cc:101
void trmm(blas::Layout layout, blas::Side side, blas::Uplo uplo, blas::Op trans, blas::Diag diag, int64_t m, int64_t n, float alpha, float const *A, int64_t lda, float *B, int64_t ldb, blas::Queue &queue)
GPU device, float version.
Definition device_trmm.cc:104
void trmm(blas::Layout layout, std::vector< blas::Side > const &side, std::vector< blas::Uplo > const &uplo, std::vector< blas::Op > const &trans, std::vector< blas::Diag > const &diag, std::vector< int64_t > const &m, std::vector< int64_t > const &n, std::vector< float > const &alpha, std::vector< float * > const &Aarray, std::vector< int64_t > const &lda, std::vector< float * > const &Barray, std::vector< int64_t > const &ldb, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
GPU device, variable-size batched, float version.
Definition device_batch_trmm.cc:95
void trsm(blas::Layout layout, std::vector< blas::Side > const &side, std::vector< blas::Uplo > const &uplo, std::vector< blas::Op > const &trans, std::vector< blas::Diag > const &diag, std::vector< int64_t > const &m, std::vector< int64_t > const &n, std::vector< float > const &alpha, std::vector< float * > const &Aarray, std::vector< int64_t > const &lda, std::vector< float * > const &Barray, std::vector< int64_t > const &ldb, size_t batch_size, std::vector< int64_t > &info, blas::Queue &queue)
GPU device, variable-size batched, float version.
Definition device_batch_trsm.cc:145
void trsm(blas::Layout layout, blas::Side side, blas::Uplo uplo, blas::Op trans, blas::Diag diag, int64_t m, int64_t n, float alpha, float const *A, int64_t lda, float *B, int64_t ldb, blas::Queue &queue)
GPU device, float version.
Definition device_trsm.cc:104