PLASMA
Parallel Linear Algebra Software for Multicore Architectures
core_blas_s.h
1 
12 #ifndef ICL_CORE_BLAS_S_H
13 #define ICL_CORE_BLAS_S_H
14 
15 #include "plasma_async.h"
16 #include "plasma_barrier.h"
17 #include "plasma_descriptor.h"
18 #include "plasma_types.h"
19 #include "plasma_workspace.h"
20 #include "plasma_descriptor.h"
21 
22 #ifdef __cplusplus
23 extern "C" {
24 #endif
25 
26 #define REAL
27 
28 /******************************************************************************/
29 #ifdef COMPLEX
30 float fabsf(float alpha);
31 #endif
32 
33 int core_sgeadd(plasma_enum_t transa,
34  int m, int n,
35  float alpha, const float *A, int lda,
36  float beta, float *B, int ldb);
37 
38 int core_sgelqt(int m, int n, int ib,
39  float *A, int lda,
40  float *T, int ldt,
41  float *tau,
42  float *work);
43 
44 void core_sgemm(plasma_enum_t transa, plasma_enum_t transb,
45  int m, int n, int k,
46  float alpha, const float *A, int lda,
47  const float *B, int ldb,
48  float beta, float *C, int ldc);
49 
50 int core_sgeqrt(int m, int n, int ib,
51  float *A, int lda,
52  float *T, int ldt,
53  float *tau,
54  float *work);
55 
56 void core_sgessq(int m, int n,
57  const float *A, int lda,
58  float *scale, float *sumsq);
59 
60 void core_sgetrf(plasma_desc_t A, int *ipiv, int ib, int rank, int size,
61  volatile int *max_idx, volatile float *max_val,
62  volatile int *info, plasma_barrier_t *barrier);
63 
64 int core_ssygst(int itype, plasma_enum_t uplo,
65  int n,
66  float *A, int lda,
67  float *B, int ldb);
68 
69 void core_ssymm(plasma_enum_t side, plasma_enum_t uplo,
70  int m, int n,
71  float alpha, const float *A, int lda,
72  const float *B, int ldb,
73  float beta, float *C, int ldc);
74 
75 void core_ssyr2k(plasma_enum_t uplo, plasma_enum_t trans,
76  int n, int k,
77  float alpha, const float *A, int lda,
78  const float *B, int ldb,
79  float beta, float *C, int ldc);
80 
81 void core_ssyrk(plasma_enum_t uplo, plasma_enum_t trans,
82  int n, int k,
83  float alpha, const float *A, int lda,
84  float beta, float *C, int ldc);
85 
86 void core_ssyssq(plasma_enum_t uplo,
87  int n,
88  const float *A, int lda,
89  float *scale, float *sumsq);
90 
91 void core_ssyssq(plasma_enum_t uplo,
92  int n,
93  const float *A, int lda,
94  float *scale, float *sumsq);
95 
96 void core_slacpy(plasma_enum_t uplo, plasma_enum_t transa,
97  int m, int n,
98  const float *A, int lda,
99  float *B, int ldb);
100 
101 void core_slacpy_lapack2tile_band(plasma_enum_t uplo,
102  int it, int jt,
103  int m, int n, int nb, int kl, int ku,
104  const float *A, int lda,
105  float *B, int ldb);
106 
107 void core_slacpy_tile2lapack_band(plasma_enum_t uplo,
108  int it, int jt,
109  int m, int n, int nb, int kl, int ku,
110  const float *B, int ldb,
111  float *A, int lda);
112 
113 void core_slange(plasma_enum_t norm,
114  int m, int n,
115  const float *A, int lda,
116  float *work, float *result);
117 
118 void core_slansy(plasma_enum_t norm, plasma_enum_t uplo,
119  int n,
120  const float *A, int lda,
121  float *work, float *value);
122 
123 void core_slansy(plasma_enum_t norm, plasma_enum_t uplo,
124  int n,
125  const float *A, int lda,
126  float *work, float *value);
127 
128 void core_slantr(plasma_enum_t norm, plasma_enum_t uplo, plasma_enum_t diag,
129  int m, int n,
130  const float *A, int lda,
131  float *work, float *value);
132 
133 void core_slascl(plasma_enum_t uplo,
134  float cfrom, float cto,
135  int m, int n,
136  float *A, int lda);
137 
138 void core_slaset(plasma_enum_t uplo,
139  int m, int n,
140  float alpha, float beta,
141  float *A, int lda);
142 
143 void core_sgeswp(plasma_enum_t colrow,
144  plasma_desc_t A, int k1, int k2, const int *ipiv, int incx);
145 
146 void core_ssyswp(int uplo, plasma_desc_t A, int k1, int k2, const int *ipiv,
147  int incx);
148 
149 int core_slauum(plasma_enum_t uplo,
150  int n,
151  float *A, int lda);
152 
153 int core_spamm(int op, plasma_enum_t side, plasma_enum_t storev,
154  int m, int n, int k, int l,
155  const float *A1, int lda1,
156  float *A2, int lda2,
157  const float *V, int ldv,
158  float *W, int ldw);
159 
160 int core_sparfb(plasma_enum_t side, plasma_enum_t trans, plasma_enum_t direct,
161  plasma_enum_t storev,
162  int m1, int n1, int m2, int n2, int k, int l,
163  float *A1, int lda1,
164  float *A2, int lda2,
165  const float *V, int ldv,
166  const float *T, int ldt,
167  float *work, int ldwork);
168 
169 int core_spemv(plasma_enum_t trans, int storev,
170  int m, int n, int l,
171  float alpha,
172  const float *A, int lda,
173  const float *X, int incx,
174  float beta,
175  float *Y, int incy,
176  float *work);
177 
178 int core_spotrf(plasma_enum_t uplo,
179  int n,
180  float *A, int lda);
181 
182 void core_ssymm(plasma_enum_t side, plasma_enum_t uplo,
183  int m, int n,
184  float alpha, const float *A, int lda,
185  const float *B, int ldb,
186  float beta, float *C, int ldc);
187 
188 void core_ssyr2k(
189  plasma_enum_t uplo, plasma_enum_t trans,
190  int n, int k,
191  float alpha, const float *A, int lda,
192  const float *B, int ldb,
193  float beta, float *C, int ldc);
194 
195 void core_ssyrk(plasma_enum_t uplo, plasma_enum_t trans,
196  int n, int k,
197  float alpha, const float *A, int lda,
198  float beta, float *C, int ldc);
199 
200 int core_stradd(plasma_enum_t uplo, plasma_enum_t transa,
201  int m, int n,
202  float alpha, const float *A, int lda,
203  float beta, float *B, int ldb);
204 
205 void core_strmm(plasma_enum_t side, plasma_enum_t uplo,
206  plasma_enum_t transa, plasma_enum_t diag,
207  int m, int n,
208  float alpha, const float *A, int lda,
209  float *B, int ldb);
210 
211 void core_strsm(plasma_enum_t side, plasma_enum_t uplo,
212  plasma_enum_t transa, plasma_enum_t diag,
213  int m, int n,
214  float alpha, const float *A, int lda,
215  float *B, int ldb);
216 
217 void core_strssq(plasma_enum_t uplo, plasma_enum_t diag,
218  int m, int n,
219  const float *A, int lda,
220  float *scale, float *sumsq);
221 
222 int core_strtri(plasma_enum_t uplo, plasma_enum_t diag,
223  int n,
224  float *A, int lda);
225 
226 int core_stslqt(int m, int n, int ib,
227  float *A1, int lda1,
228  float *A2, int lda2,
229  float *T, int ldt,
230  float *tau,
231  float *work);
232 
233 int core_stsmlq(plasma_enum_t side, plasma_enum_t trans,
234  int m1, int n1, int m2, int n2, int k, int ib,
235  float *A1, int lda1,
236  float *A2, int lda2,
237  const float *V, int ldv,
238  const float *T, int ldt,
239  float *work, int ldwork);
240 
241 int core_stsmqr(plasma_enum_t side, plasma_enum_t trans,
242  int m1, int n1, int m2, int n2, int k, int ib,
243  float *A1, int lda1,
244  float *A2, int lda2,
245  const float *V, int ldv,
246  const float *T, int ldt,
247  float *work, int ldwork);
248 
249 int core_stsqrt(int m, int n, int ib,
250  float *A1, int lda1,
251  float *A2, int lda2,
252  float *T, int ldt,
253  float *tau,
254  float *work);
255 
256 int core_sttlqt(int m, int n, int ib,
257  float *A1, int lda1,
258  float *A2, int lda2,
259  float *T, int ldt,
260  float *tau,
261  float *work);
262 
263 int core_sttmlq(plasma_enum_t side, plasma_enum_t trans,
264  int m1, int n1, int m2, int n2, int k, int ib,
265  float *A1, int lda1,
266  float *A2, int lda2,
267  const float *V, int ldv,
268  const float *T, int ldt,
269  float *work, int ldwork);
270 
271 int core_sttmqr(plasma_enum_t side, plasma_enum_t trans,
272  int m1, int n1, int m2, int n2, int k, int ib,
273  float *A1, int lda1,
274  float *A2, int lda2,
275  const float *V, int ldv,
276  const float *T, int ldt,
277  float *work, int ldwork);
278 
279 int core_sttqrt(int m, int n, int ib,
280  float *A1, int lda1,
281  float *A2, int lda2,
282  float *T, int ldt,
283  float *tau,
284  float *work);
285 
286 int core_sormlq(plasma_enum_t side, plasma_enum_t trans,
287  int m, int n, int k, int ib,
288  const float *A, int lda,
289  const float *T, int ldt,
290  float *C, int ldc,
291  float *work, int ldwork);
292 
293 int core_sormqr(plasma_enum_t side, plasma_enum_t trans,
294  int m, int n, int k, int ib,
295  const float *A, int lda,
296  const float *T, int ldt,
297  float *C, int ldc,
298  float *work, int ldwork);
299 
300 /******************************************************************************/
301 void core_omp_samax(int colrow, int m, int n,
302  const float *A, int lda,
303  float *values,
304  plasma_sequence_t *sequence, plasma_request_t *request);
305 
306 void core_omp_sgeadd(
307  plasma_enum_t transa, int m, int n,
308  float alpha, const float *A, int lda,
309  float beta, float *B, int ldb,
310  plasma_sequence_t *sequence, plasma_request_t *request);
311 
312 void core_omp_sgelqt(int m, int n, int ib,
313  float *A, int lda,
314  float *T, int ldt,
315  plasma_workspace_t work,
316  plasma_sequence_t *sequence, plasma_request_t *request);
317 
318 void core_omp_sgemm(
319  plasma_enum_t transa, plasma_enum_t transb,
320  int m, int n, int k,
321  float alpha, const float *A, int lda,
322  const float *B, int ldb,
323  float beta, float *C, int ldc,
324  plasma_sequence_t *sequence, plasma_request_t *request);
325 
326 void core_omp_sgeqrt(int m, int n, int ib,
327  float *A, int lda,
328  float *T, int ldt,
329  plasma_workspace_t work,
330  plasma_sequence_t *sequence, plasma_request_t *request);
331 
332 void core_omp_sgessq(int m, int n,
333  const float *A, int lda,
334  float *scale, float *sumsq,
335  plasma_sequence_t *sequence, plasma_request_t *request);
336 
337 void core_omp_sgessq_aux(int n,
338  const float *scale, const float *sumsq,
339  float *value,
340  plasma_sequence_t *sequence,
341  plasma_request_t *request);
342 
343 void core_omp_ssygst(int itype, plasma_enum_t uplo,
344  int n,
345  float *A, int lda,
346  float *B, int ldb,
347  plasma_sequence_t *sequence, plasma_request_t *request);
348 
349 void core_omp_ssymm(
350  plasma_enum_t side, plasma_enum_t uplo,
351  int m, int n,
352  float alpha, const float *A, int lda,
353  const float *B, int ldb,
354  float beta, float *C, int ldc,
355  plasma_sequence_t *sequence, plasma_request_t *request);
356 
357 void core_omp_ssyr2k(
358  plasma_enum_t uplo, plasma_enum_t trans,
359  int n, int k,
360  float alpha, const float *A, int lda,
361  const float *B, int ldb,
362  float beta, float *C, int ldc,
363  plasma_sequence_t *sequence, plasma_request_t *request);
364 
365 void core_omp_ssyrk(plasma_enum_t uplo, plasma_enum_t trans,
366  int n, int k,
367  float alpha, const float *A, int lda,
368  float beta, float *C, int ldc,
369  plasma_sequence_t *sequence, plasma_request_t *request);
370 
371 void core_omp_ssyssq(plasma_enum_t uplo,
372  int n,
373  const float *A, int lda,
374  float *scale, float *sumsq,
375  plasma_sequence_t *sequence, plasma_request_t *request);
376 
377 void core_omp_ssyssq(plasma_enum_t uplo,
378  int n,
379  const float *A, int lda,
380  float *scale, float *sumsq,
381  plasma_sequence_t *sequence, plasma_request_t *request);
382 
383 void core_omp_ssyssq_aux(int m, int n,
384  const float *scale, const float *sumsq,
385  float *value,
386  plasma_sequence_t *sequence,
387  plasma_request_t *request);
388 
389 void core_omp_slacpy(plasma_enum_t uplo, plasma_enum_t transa,
390  int m, int n,
391  const float *A, int lda,
392  float *B, int ldb,
393  plasma_sequence_t *sequence, plasma_request_t *request);
394 
395 void core_omp_slacpy_lapack2tile_band(plasma_enum_t uplo,
396  int it, int jt,
397  int m, int n, int nb, int kl, int ku,
398  const float *A, int lda,
399  float *B, int ldb);
400 
401 void core_omp_slacpy_tile2lapack_band(plasma_enum_t uplo,
402  int it, int jt,
403  int m, int n, int nb, int kl, int ku,
404  const float *B, int ldb,
405  float *A, int lda);
406 
407 void core_omp_slange(plasma_enum_t norm,
408  int m, int n,
409  const float *A, int lda,
410  float *work, float *result,
411  plasma_sequence_t *sequence, plasma_request_t *request);
412 
413 void core_omp_slange_aux(plasma_enum_t norm,
414  int m, int n,
415  const float *A, int lda,
416  float *value,
417  plasma_sequence_t *sequence,
418  plasma_request_t *request);
419 
420 void core_omp_slansy(plasma_enum_t norm, plasma_enum_t uplo,
421  int n,
422  const float *A, int lda,
423  float *work, float *value,
424  plasma_sequence_t *sequence, plasma_request_t *request);
425 
426 void core_omp_slansy_aux(plasma_enum_t norm, plasma_enum_t uplo,
427  int n,
428  const float *A, int lda,
429  float *value,
430  plasma_sequence_t *sequence,
431  plasma_request_t *request);
432 
433 void core_omp_slansy(plasma_enum_t norm, plasma_enum_t uplo,
434  int n,
435  const float *A, int lda,
436  float *work, float *value,
437  plasma_sequence_t *sequence, plasma_request_t *request);
438 
439 void core_omp_slansy_aux(plasma_enum_t norm, plasma_enum_t uplo,
440  int n,
441  const float *A, int lda,
442  float *value,
443  plasma_sequence_t *sequence,
444  plasma_request_t *request);
445 
446 void core_omp_slantr(plasma_enum_t norm, plasma_enum_t uplo, plasma_enum_t diag,
447  int m, int n,
448  const float *A, int lda,
449  float *work, float *value,
450  plasma_sequence_t *sequence, plasma_request_t *request);
451 
452 void core_omp_slantr_aux(plasma_enum_t norm, plasma_enum_t uplo,
453  plasma_enum_t diag,
454  int m, int n,
455  const float *A, int lda,
456  float *value,
457  plasma_sequence_t *sequence,
458  plasma_request_t *request);
459 
460 void core_omp_slascl(plasma_enum_t uplo,
461  float cfrom, float cto,
462  int m, int n,
463  float *A, int lda,
464  plasma_sequence_t *sequence, plasma_request_t *request);
465 
466 void core_omp_slaset(plasma_enum_t uplo,
467  int mb, int nb,
468  int i, int j,
469  int m, int n,
470  float alpha, float beta,
471  float *A);
472 
473 void core_omp_slauum(plasma_enum_t uplo,
474  int n,
475  float *A, int lda,
476  plasma_sequence_t *sequence, plasma_request_t *request);
477 
478 void core_omp_spotrf(plasma_enum_t uplo,
479  int n,
480  float *A, int lda,
481  int iinfo,
482  plasma_sequence_t *sequence, plasma_request_t *request);
483 
484 void core_omp_ssymm(
485  plasma_enum_t side, plasma_enum_t uplo,
486  int m, int n,
487  float alpha, const float *A, int lda,
488  const float *B, int ldb,
489  float beta, float *C, int ldc,
490  plasma_sequence_t *sequence, plasma_request_t *request);
491 
492 void core_omp_ssyr2k(
493  plasma_enum_t uplo, plasma_enum_t trans,
494  int n, int k,
495  float alpha, const float *A, int lda,
496  const float *B, int ldb,
497  float beta, float *C, int ldc,
498  plasma_sequence_t *sequence, plasma_request_t *request);
499 
500 void core_omp_ssyrk(
501  plasma_enum_t uplo, plasma_enum_t trans,
502  int n, int k,
503  float alpha, const float *A, int lda,
504  float beta, float *C, int ldc,
505  plasma_sequence_t *sequence, plasma_request_t *request);
506 
507 void core_omp_stradd(
508  plasma_enum_t uplo, plasma_enum_t transa,
509  int m, int n,
510  float alpha, const float *A, int lda,
511  float beta, float *B, int ldb,
512  plasma_sequence_t *sequence, plasma_request_t *request);
513 
514 void core_omp_strmm(
515  plasma_enum_t side, plasma_enum_t uplo,
516  plasma_enum_t transa, plasma_enum_t diag,
517  int m, int n,
518  float alpha, const float *A, int lda,
519  float *B, int ldb,
520  plasma_sequence_t *sequence, plasma_request_t *request);
521 
522 void core_omp_strsm(
523  plasma_enum_t side, plasma_enum_t uplo,
524  plasma_enum_t transa, plasma_enum_t diag,
525  int m, int n,
526  float alpha, const float *A, int lda,
527  float *B, int ldb,
528  plasma_sequence_t *sequence, plasma_request_t *request);
529 
530 void core_omp_strssq(plasma_enum_t uplo, plasma_enum_t diag,
531  int m, int n,
532  const float *A, int lda,
533  float *scale, float *sumsq,
534  plasma_sequence_t *sequence, plasma_request_t *request);
535 
536 void core_omp_strtri(plasma_enum_t uplo, plasma_enum_t diag,
537  int n,
538  float *A, int lda,
539  int iinfo,
540  plasma_sequence_t *sequence, plasma_request_t *request);
541 
542 void core_omp_stslqt(int m, int n, int ib,
543  float *A1, int lda1,
544  float *A2, int lda2,
545  float *T, int ldt,
546  plasma_workspace_t work,
547  plasma_sequence_t *sequence, plasma_request_t *request);
548 
549 void core_omp_stsmlq(plasma_enum_t side, plasma_enum_t trans,
550  int m1, int n1, int m2, int n2, int k, int ib,
551  float *A1, int lda1,
552  float *A2, int lda2,
553  const float *V, int ldv,
554  const float *T, int ldt,
555  plasma_workspace_t work,
556  plasma_sequence_t *sequence, plasma_request_t *request);
557 
558 void core_omp_stsmqr(plasma_enum_t side, plasma_enum_t trans,
559  int m1, int n1, int m2, int n2, int k, int ib,
560  float *A1, int lda1,
561  float *A2, int lda2,
562  const float *V, int ldv,
563  const float *T, int ldt,
564  plasma_workspace_t work,
565  plasma_sequence_t *sequence, plasma_request_t *request);
566 
567 void core_omp_stsqrt(int m, int n, int ib,
568  float *A1, int lda1,
569  float *A2, int lda2,
570  float *T, int ldt,
571  plasma_workspace_t work,
572  plasma_sequence_t *sequence, plasma_request_t *request);
573 
574 void core_omp_sttlqt(int m, int n, int ib,
575  float *A1, int lda1,
576  float *A2, int lda2,
577  float *T, int ldt,
578  plasma_workspace_t work,
579  plasma_sequence_t *sequence, plasma_request_t *request);
580 
581 void core_omp_sttmlq(plasma_enum_t side, plasma_enum_t trans,
582  int m1, int n1, int m2, int n2, int k, int ib,
583  float *A1, int lda1,
584  float *A2, int lda2,
585  const float *V, int ldv,
586  const float *T, int ldt,
587  plasma_workspace_t work,
588  plasma_sequence_t *sequence, plasma_request_t *request);
589 
590 void core_omp_sttmqr(plasma_enum_t side, plasma_enum_t trans,
591  int m1, int n1, int m2, int n2, int k, int ib,
592  float *A1, int lda1,
593  float *A2, int lda2,
594  const float *V, int ldv,
595  const float *T, int ldt,
596  plasma_workspace_t work,
597  plasma_sequence_t *sequence, plasma_request_t *request);
598 
599 void core_omp_sttqrt(int m, int n, int ib,
600  float *A1, int lda1,
601  float *A2, int lda2,
602  float *T, int ldt,
603  plasma_workspace_t work,
604  plasma_sequence_t *sequence, plasma_request_t *request);
605 
606 void core_omp_sormlq(plasma_enum_t side, plasma_enum_t trans,
607  int m, int n, int k, int ib,
608  const float *A, int lda,
609  const float *T, int ldt,
610  float *C, int ldc,
611  plasma_workspace_t work,
612  plasma_sequence_t *sequence, plasma_request_t *request);
613 
614 void core_omp_sormqr(plasma_enum_t side, plasma_enum_t trans,
615  int m, int n, int k, int ib,
616  const float *A, int lda,
617  const float *T, int ldt,
618  float *C, int ldc,
619  plasma_workspace_t work,
620  plasma_sequence_t *sequence, plasma_request_t *request);
621 
622 #undef REAL
623 
624 #ifdef __cplusplus
625 } // extern "C"
626 #endif
627 
628 #endif // ICL_CORE_BLAS_S_H
Definition: plasma_descriptor.h:40