12 #ifndef ICL_CORE_BLAS_S_H
13 #define ICL_CORE_BLAS_S_H
15 #include "plasma_async.h"
16 #include "plasma_barrier.h"
17 #include "plasma_descriptor.h"
18 #include "plasma_types.h"
19 #include "plasma_workspace.h"
20 #include "plasma_descriptor.h"
30 float fabsf(
float alpha);
33 int core_sgeadd(plasma_enum_t transa,
35 float alpha,
const float *A,
int lda,
36 float beta,
float *B,
int ldb);
38 int core_sgelqt(
int m,
int n,
int ib,
44 void core_sgemm(plasma_enum_t transa, plasma_enum_t transb,
46 float alpha,
const float *A,
int lda,
47 const float *B,
int ldb,
48 float beta,
float *C,
int ldc);
50 int core_sgeqrt(
int m,
int n,
int ib,
56 void core_sgessq(
int m,
int n,
57 const float *A,
int lda,
58 float *scale,
float *sumsq);
60 void core_sgetrf(
plasma_desc_t A,
int *ipiv,
int ib,
int rank,
int size,
61 volatile int *max_idx,
volatile float *max_val,
62 volatile int *info, plasma_barrier_t *barrier);
64 int core_ssygst(
int itype, plasma_enum_t uplo,
69 void core_ssymm(plasma_enum_t side, plasma_enum_t uplo,
71 float alpha,
const float *A,
int lda,
72 const float *B,
int ldb,
73 float beta,
float *C,
int ldc);
75 void core_ssyr2k(plasma_enum_t uplo, plasma_enum_t trans,
77 float alpha,
const float *A,
int lda,
78 const float *B,
int ldb,
79 float beta,
float *C,
int ldc);
81 void core_ssyrk(plasma_enum_t uplo, plasma_enum_t trans,
83 float alpha,
const float *A,
int lda,
84 float beta,
float *C,
int ldc);
86 void core_ssyssq(plasma_enum_t uplo,
88 const float *A,
int lda,
89 float *scale,
float *sumsq);
91 void core_ssyssq(plasma_enum_t uplo,
93 const float *A,
int lda,
94 float *scale,
float *sumsq);
96 void core_slacpy(plasma_enum_t uplo, plasma_enum_t transa,
98 const float *A,
int lda,
101 void core_slacpy_lapack2tile_band(plasma_enum_t uplo,
103 int m,
int n,
int nb,
int kl,
int ku,
104 const float *A,
int lda,
107 void core_slacpy_tile2lapack_band(plasma_enum_t uplo,
109 int m,
int n,
int nb,
int kl,
int ku,
110 const float *B,
int ldb,
113 void core_slange(plasma_enum_t norm,
115 const float *A,
int lda,
116 float *work,
float *result);
118 void core_slansy(plasma_enum_t norm, plasma_enum_t uplo,
120 const float *A,
int lda,
121 float *work,
float *value);
123 void core_slansy(plasma_enum_t norm, plasma_enum_t uplo,
125 const float *A,
int lda,
126 float *work,
float *value);
128 void core_slantr(plasma_enum_t norm, plasma_enum_t uplo, plasma_enum_t diag,
130 const float *A,
int lda,
131 float *work,
float *value);
133 void core_slascl(plasma_enum_t uplo,
134 float cfrom,
float cto,
138 void core_slaset(plasma_enum_t uplo,
140 float alpha,
float beta,
143 void core_sgeswp(plasma_enum_t colrow,
146 void core_ssyswp(
int uplo,
plasma_desc_t A,
int k1,
int k2,
const int *ipiv,
149 int core_slauum(plasma_enum_t uplo,
153 int core_spamm(
int op, plasma_enum_t side, plasma_enum_t storev,
154 int m,
int n,
int k,
int l,
155 const float *A1,
int lda1,
157 const float *V,
int ldv,
160 int core_sparfb(plasma_enum_t side, plasma_enum_t trans, plasma_enum_t direct,
161 plasma_enum_t storev,
162 int m1,
int n1,
int m2,
int n2,
int k,
int l,
165 const float *V,
int ldv,
166 const float *T,
int ldt,
167 float *work,
int ldwork);
169 int core_spemv(plasma_enum_t trans,
int storev,
172 const float *A,
int lda,
173 const float *X,
int incx,
178 int core_spotrf(plasma_enum_t uplo,
182 void core_ssymm(plasma_enum_t side, plasma_enum_t uplo,
184 float alpha,
const float *A,
int lda,
185 const float *B,
int ldb,
186 float beta,
float *C,
int ldc);
189 plasma_enum_t uplo, plasma_enum_t trans,
191 float alpha,
const float *A,
int lda,
192 const float *B,
int ldb,
193 float beta,
float *C,
int ldc);
195 void core_ssyrk(plasma_enum_t uplo, plasma_enum_t trans,
197 float alpha,
const float *A,
int lda,
198 float beta,
float *C,
int ldc);
200 int core_stradd(plasma_enum_t uplo, plasma_enum_t transa,
202 float alpha,
const float *A,
int lda,
203 float beta,
float *B,
int ldb);
205 void core_strmm(plasma_enum_t side, plasma_enum_t uplo,
206 plasma_enum_t transa, plasma_enum_t diag,
208 float alpha,
const float *A,
int lda,
211 void core_strsm(plasma_enum_t side, plasma_enum_t uplo,
212 plasma_enum_t transa, plasma_enum_t diag,
214 float alpha,
const float *A,
int lda,
217 void core_strssq(plasma_enum_t uplo, plasma_enum_t diag,
219 const float *A,
int lda,
220 float *scale,
float *sumsq);
222 int core_strtri(plasma_enum_t uplo, plasma_enum_t diag,
226 int core_stslqt(
int m,
int n,
int ib,
233 int core_stsmlq(plasma_enum_t side, plasma_enum_t trans,
234 int m1,
int n1,
int m2,
int n2,
int k,
int ib,
237 const float *V,
int ldv,
238 const float *T,
int ldt,
239 float *work,
int ldwork);
241 int core_stsmqr(plasma_enum_t side, plasma_enum_t trans,
242 int m1,
int n1,
int m2,
int n2,
int k,
int ib,
245 const float *V,
int ldv,
246 const float *T,
int ldt,
247 float *work,
int ldwork);
249 int core_stsqrt(
int m,
int n,
int ib,
256 int core_sttlqt(
int m,
int n,
int ib,
263 int core_sttmlq(plasma_enum_t side, plasma_enum_t trans,
264 int m1,
int n1,
int m2,
int n2,
int k,
int ib,
267 const float *V,
int ldv,
268 const float *T,
int ldt,
269 float *work,
int ldwork);
271 int core_sttmqr(plasma_enum_t side, plasma_enum_t trans,
272 int m1,
int n1,
int m2,
int n2,
int k,
int ib,
275 const float *V,
int ldv,
276 const float *T,
int ldt,
277 float *work,
int ldwork);
279 int core_sttqrt(
int m,
int n,
int ib,
286 int core_sormlq(plasma_enum_t side, plasma_enum_t trans,
287 int m,
int n,
int k,
int ib,
288 const float *A,
int lda,
289 const float *T,
int ldt,
291 float *work,
int ldwork);
293 int core_sormqr(plasma_enum_t side, plasma_enum_t trans,
294 int m,
int n,
int k,
int ib,
295 const float *A,
int lda,
296 const float *T,
int ldt,
298 float *work,
int ldwork);
301 void core_omp_samax(
int colrow,
int m,
int n,
302 const float *A,
int lda,
304 plasma_sequence_t *sequence, plasma_request_t *request);
306 void core_omp_sgeadd(
307 plasma_enum_t transa,
int m,
int n,
308 float alpha,
const float *A,
int lda,
309 float beta,
float *B,
int ldb,
310 plasma_sequence_t *sequence, plasma_request_t *request);
312 void core_omp_sgelqt(
int m,
int n,
int ib,
315 plasma_workspace_t work,
316 plasma_sequence_t *sequence, plasma_request_t *request);
319 plasma_enum_t transa, plasma_enum_t transb,
321 float alpha,
const float *A,
int lda,
322 const float *B,
int ldb,
323 float beta,
float *C,
int ldc,
324 plasma_sequence_t *sequence, plasma_request_t *request);
326 void core_omp_sgeqrt(
int m,
int n,
int ib,
329 plasma_workspace_t work,
330 plasma_sequence_t *sequence, plasma_request_t *request);
332 void core_omp_sgessq(
int m,
int n,
333 const float *A,
int lda,
334 float *scale,
float *sumsq,
335 plasma_sequence_t *sequence, plasma_request_t *request);
337 void core_omp_sgessq_aux(
int n,
338 const float *scale,
const float *sumsq,
340 plasma_sequence_t *sequence,
341 plasma_request_t *request);
343 void core_omp_ssygst(
int itype, plasma_enum_t uplo,
347 plasma_sequence_t *sequence, plasma_request_t *request);
350 plasma_enum_t side, plasma_enum_t uplo,
352 float alpha,
const float *A,
int lda,
353 const float *B,
int ldb,
354 float beta,
float *C,
int ldc,
355 plasma_sequence_t *sequence, plasma_request_t *request);
357 void core_omp_ssyr2k(
358 plasma_enum_t uplo, plasma_enum_t trans,
360 float alpha,
const float *A,
int lda,
361 const float *B,
int ldb,
362 float beta,
float *C,
int ldc,
363 plasma_sequence_t *sequence, plasma_request_t *request);
365 void core_omp_ssyrk(plasma_enum_t uplo, plasma_enum_t trans,
367 float alpha,
const float *A,
int lda,
368 float beta,
float *C,
int ldc,
369 plasma_sequence_t *sequence, plasma_request_t *request);
371 void core_omp_ssyssq(plasma_enum_t uplo,
373 const float *A,
int lda,
374 float *scale,
float *sumsq,
375 plasma_sequence_t *sequence, plasma_request_t *request);
377 void core_omp_ssyssq(plasma_enum_t uplo,
379 const float *A,
int lda,
380 float *scale,
float *sumsq,
381 plasma_sequence_t *sequence, plasma_request_t *request);
383 void core_omp_ssyssq_aux(
int m,
int n,
384 const float *scale,
const float *sumsq,
386 plasma_sequence_t *sequence,
387 plasma_request_t *request);
389 void core_omp_slacpy(plasma_enum_t uplo, plasma_enum_t transa,
391 const float *A,
int lda,
393 plasma_sequence_t *sequence, plasma_request_t *request);
395 void core_omp_slacpy_lapack2tile_band(plasma_enum_t uplo,
397 int m,
int n,
int nb,
int kl,
int ku,
398 const float *A,
int lda,
401 void core_omp_slacpy_tile2lapack_band(plasma_enum_t uplo,
403 int m,
int n,
int nb,
int kl,
int ku,
404 const float *B,
int ldb,
407 void core_omp_slange(plasma_enum_t norm,
409 const float *A,
int lda,
410 float *work,
float *result,
411 plasma_sequence_t *sequence, plasma_request_t *request);
413 void core_omp_slange_aux(plasma_enum_t norm,
415 const float *A,
int lda,
417 plasma_sequence_t *sequence,
418 plasma_request_t *request);
420 void core_omp_slansy(plasma_enum_t norm, plasma_enum_t uplo,
422 const float *A,
int lda,
423 float *work,
float *value,
424 plasma_sequence_t *sequence, plasma_request_t *request);
426 void core_omp_slansy_aux(plasma_enum_t norm, plasma_enum_t uplo,
428 const float *A,
int lda,
430 plasma_sequence_t *sequence,
431 plasma_request_t *request);
433 void core_omp_slansy(plasma_enum_t norm, plasma_enum_t uplo,
435 const float *A,
int lda,
436 float *work,
float *value,
437 plasma_sequence_t *sequence, plasma_request_t *request);
439 void core_omp_slansy_aux(plasma_enum_t norm, plasma_enum_t uplo,
441 const float *A,
int lda,
443 plasma_sequence_t *sequence,
444 plasma_request_t *request);
446 void core_omp_slantr(plasma_enum_t norm, plasma_enum_t uplo, plasma_enum_t diag,
448 const float *A,
int lda,
449 float *work,
float *value,
450 plasma_sequence_t *sequence, plasma_request_t *request);
452 void core_omp_slantr_aux(plasma_enum_t norm, plasma_enum_t uplo,
455 const float *A,
int lda,
457 plasma_sequence_t *sequence,
458 plasma_request_t *request);
460 void core_omp_slascl(plasma_enum_t uplo,
461 float cfrom,
float cto,
464 plasma_sequence_t *sequence, plasma_request_t *request);
466 void core_omp_slaset(plasma_enum_t uplo,
470 float alpha,
float beta,
473 void core_omp_slauum(plasma_enum_t uplo,
476 plasma_sequence_t *sequence, plasma_request_t *request);
478 void core_omp_spotrf(plasma_enum_t uplo,
482 plasma_sequence_t *sequence, plasma_request_t *request);
485 plasma_enum_t side, plasma_enum_t uplo,
487 float alpha,
const float *A,
int lda,
488 const float *B,
int ldb,
489 float beta,
float *C,
int ldc,
490 plasma_sequence_t *sequence, plasma_request_t *request);
492 void core_omp_ssyr2k(
493 plasma_enum_t uplo, plasma_enum_t trans,
495 float alpha,
const float *A,
int lda,
496 const float *B,
int ldb,
497 float beta,
float *C,
int ldc,
498 plasma_sequence_t *sequence, plasma_request_t *request);
501 plasma_enum_t uplo, plasma_enum_t trans,
503 float alpha,
const float *A,
int lda,
504 float beta,
float *C,
int ldc,
505 plasma_sequence_t *sequence, plasma_request_t *request);
507 void core_omp_stradd(
508 plasma_enum_t uplo, plasma_enum_t transa,
510 float alpha,
const float *A,
int lda,
511 float beta,
float *B,
int ldb,
512 plasma_sequence_t *sequence, plasma_request_t *request);
515 plasma_enum_t side, plasma_enum_t uplo,
516 plasma_enum_t transa, plasma_enum_t diag,
518 float alpha,
const float *A,
int lda,
520 plasma_sequence_t *sequence, plasma_request_t *request);
523 plasma_enum_t side, plasma_enum_t uplo,
524 plasma_enum_t transa, plasma_enum_t diag,
526 float alpha,
const float *A,
int lda,
528 plasma_sequence_t *sequence, plasma_request_t *request);
530 void core_omp_strssq(plasma_enum_t uplo, plasma_enum_t diag,
532 const float *A,
int lda,
533 float *scale,
float *sumsq,
534 plasma_sequence_t *sequence, plasma_request_t *request);
536 void core_omp_strtri(plasma_enum_t uplo, plasma_enum_t diag,
540 plasma_sequence_t *sequence, plasma_request_t *request);
542 void core_omp_stslqt(
int m,
int n,
int ib,
546 plasma_workspace_t work,
547 plasma_sequence_t *sequence, plasma_request_t *request);
549 void core_omp_stsmlq(plasma_enum_t side, plasma_enum_t trans,
550 int m1,
int n1,
int m2,
int n2,
int k,
int ib,
553 const float *V,
int ldv,
554 const float *T,
int ldt,
555 plasma_workspace_t work,
556 plasma_sequence_t *sequence, plasma_request_t *request);
558 void core_omp_stsmqr(plasma_enum_t side, plasma_enum_t trans,
559 int m1,
int n1,
int m2,
int n2,
int k,
int ib,
562 const float *V,
int ldv,
563 const float *T,
int ldt,
564 plasma_workspace_t work,
565 plasma_sequence_t *sequence, plasma_request_t *request);
567 void core_omp_stsqrt(
int m,
int n,
int ib,
571 plasma_workspace_t work,
572 plasma_sequence_t *sequence, plasma_request_t *request);
574 void core_omp_sttlqt(
int m,
int n,
int ib,
578 plasma_workspace_t work,
579 plasma_sequence_t *sequence, plasma_request_t *request);
581 void core_omp_sttmlq(plasma_enum_t side, plasma_enum_t trans,
582 int m1,
int n1,
int m2,
int n2,
int k,
int ib,
585 const float *V,
int ldv,
586 const float *T,
int ldt,
587 plasma_workspace_t work,
588 plasma_sequence_t *sequence, plasma_request_t *request);
590 void core_omp_sttmqr(plasma_enum_t side, plasma_enum_t trans,
591 int m1,
int n1,
int m2,
int n2,
int k,
int ib,
594 const float *V,
int ldv,
595 const float *T,
int ldt,
596 plasma_workspace_t work,
597 plasma_sequence_t *sequence, plasma_request_t *request);
599 void core_omp_sttqrt(
int m,
int n,
int ib,
603 plasma_workspace_t work,
604 plasma_sequence_t *sequence, plasma_request_t *request);
606 void core_omp_sormlq(plasma_enum_t side, plasma_enum_t trans,
607 int m,
int n,
int k,
int ib,
608 const float *A,
int lda,
609 const float *T,
int ldt,
611 plasma_workspace_t work,
612 plasma_sequence_t *sequence, plasma_request_t *request);
614 void core_omp_sormqr(plasma_enum_t side, plasma_enum_t trans,
615 int m,
int n,
int k,
int ib,
616 const float *A,
int lda,
617 const float *T,
int ldt,
619 plasma_workspace_t work,
620 plasma_sequence_t *sequence, plasma_request_t *request);
628 #endif // ICL_CORE_BLAS_S_H
Definition: plasma_descriptor.h:40