Functions | |
void | magma_cgemv (magma_trans_t transA, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, const magmaFloatComplex *dA, magma_int_t ldda, const magmaFloatComplex *dx, magma_int_t incx, magmaFloatComplex beta, magmaFloatComplex *dy, magma_int_t incy) |
Perform matrix-vector product. | |
void | magma_cgerc (magma_int_t m, magma_int_t n, magmaFloatComplex alpha, const magmaFloatComplex *dx, magma_int_t incx, const magmaFloatComplex *dy, magma_int_t incy, magmaFloatComplex *dA, magma_int_t ldda) |
Perform rank-1 update, ![]() | |
void | magma_cgeru (magma_int_t m, magma_int_t n, magmaFloatComplex alpha, const magmaFloatComplex *dx, magma_int_t incx, const magmaFloatComplex *dy, magma_int_t incy, magmaFloatComplex *dA, magma_int_t ldda) |
Perform rank-1 update (unconjugated), ![]() | |
void | magma_chemv (magma_uplo_t uplo, magma_int_t n, magmaFloatComplex alpha, const magmaFloatComplex *dA, magma_int_t ldda, const magmaFloatComplex *dx, magma_int_t incx, magmaFloatComplex beta, magmaFloatComplex *dy, magma_int_t incy) |
Perform Hermitian matrix-vector product, ![]() | |
void | magma_cher (magma_uplo_t uplo, magma_int_t n, float alpha, const magmaFloatComplex *dx, magma_int_t incx, magmaFloatComplex *dA, magma_int_t ldda) |
Perform Hermitian rank-1 update, ![]() | |
void | magma_cher2 (magma_uplo_t uplo, magma_int_t n, magmaFloatComplex alpha, const magmaFloatComplex *dx, magma_int_t incx, const magmaFloatComplex *dy, magma_int_t incy, magmaFloatComplex *dA, magma_int_t ldda) |
Perform Hermitian rank-2 update, ![]() | |
void | magma_ctrmv (magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t n, const magmaFloatComplex *dA, magma_int_t ldda, magmaFloatComplex *dx, magma_int_t incx) |
Perform triangular matrix-vector product. | |
void | magma_ctrsv (magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t n, const magmaFloatComplex *dA, magma_int_t ldda, magmaFloatComplex *dx, magma_int_t incx) |
Solve triangular matrix-vector system (one right-hand side). | |
void | magmablas_cgemv_batched (magma_trans_t trans, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex_ptr dA_array[], magma_int_t ldda, magmaFloatComplex_ptr dx_array[], magma_int_t incx, magmaFloatComplex beta, magmaFloatComplex_ptr dy_array[], magma_int_t incy, magma_int_t batchCount) |
This routine computes Y = alpha opt(A) x + beta y, on the GPU, where A = dA_array[i],x = x_array[i] and y = y_array[i], i=[0,batchCount-1]. | |
void | magmablas_cgemv_conjv (magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_const_ptr dx, magma_int_t incx, magmaFloatComplex beta, magmaFloatComplex_ptr dy, magma_int_t incy) |
CGEMV_CONJV performs the matrix-vector operation. | |
magma_int_t | magmablas_chemv_work (magma_uplo_t uplo, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_const_ptr dx, magma_int_t incx, magmaFloatComplex beta, magmaFloatComplex_ptr dy, magma_int_t incy, magmaFloatComplex_ptr dwork, magma_int_t lwork) |
magmablas_chemv_work performs the matrix-vector operation: | |
magma_int_t | magmablas_chemv (magma_uplo_t uplo, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_const_ptr dx, magma_int_t incx, magmaFloatComplex beta, magmaFloatComplex_ptr dy, magma_int_t incy) |
magmablas_chemv performs the matrix-vector operation: | |
magma_int_t | magmablas_chemv_mgpu_offset (magma_uplo_t uplo, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex_ptr dA[], magma_int_t ldda, magmaFloatComplex_ptr dx[], magma_int_t incx, magmaFloatComplex beta, magmaFloatComplex_ptr dy[], magma_int_t incy, magmaFloatComplex_ptr dwork[], magma_int_t lwork, magma_int_t ngpu, magma_int_t nb, magma_int_t offset, magma_queue_t queues[][10]) |
magmablas_chemv performs the matrix-vector operation: | |
void | magmablas_cswapblk (magma_order_t order, magma_int_t n, magmaFloatComplex_ptr dA, magma_int_t ldda, magmaFloatComplex_ptr dB, magma_int_t lddb, magma_int_t i1, magma_int_t i2, const magma_int_t *ipiv, magma_int_t inci, magma_int_t offset) |
magma_int_t | magmablas_csymv_work (magma_uplo_t uplo, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_const_ptr dx, magma_int_t incx, magmaFloatComplex beta, magmaFloatComplex_ptr dy, magma_int_t incy, magmaFloatComplex_ptr dwork, magma_int_t lwork) |
magmablas_csymv_work performs the matrix-vector operation: | |
magma_int_t | magmablas_csymv (magma_uplo_t uplo, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_const_ptr dx, magma_int_t incx, magmaFloatComplex beta, magmaFloatComplex_ptr dy, magma_int_t incy) |
magmablas_csymv performs the matrix-vector operation: |
void magma_cgemv | ( | magma_trans_t | transA, | |
magma_int_t | m, | |||
magma_int_t | n, | |||
magmaFloatComplex | alpha, | |||
const magmaFloatComplex * | dA, | |||
magma_int_t | ldda, | |||
const magmaFloatComplex * | dx, | |||
magma_int_t | incx, | |||
magmaFloatComplex | beta, | |||
magmaFloatComplex * | dy, | |||
magma_int_t | incy | |||
) |
Perform matrix-vector product.
(transA == MagmaNoTrans), or
(transA == MagmaTrans), or
(transA == MagmaConjTrans).
[in] | transA | Operation to perform on A. |
[in] | m | Number of rows of A. m >= 0. |
[in] | n | Number of columns of A. n >= 0. |
[in] | alpha | Scalar ![]() |
[in] | dA | COMPLEX array of dimension (ldda,n), ldda >= max(1,m). The m-by-n matrix A, on GPU device. |
[in] | ldda | Leading dimension of dA. |
[in] | dx | COMPLEX array on GPU device. If transA == MagmaNoTrans, the n element vector x of dimension (1 + (n-1)*incx); otherwise, the m element vector x of dimension (1 + (m-1)*incx). |
[in] | incx | Stride between consecutive elements of dx. incx != 0. |
[in] | beta | Scalar ![]() |
[in,out] | dy | COMPLEX array on GPU device. If transA == MagmaNoTrans, the m element vector y of dimension (1 + (m-1)*incy); otherwise, the n element vector y of dimension (1 + (n-1)*incy). |
[in] | incy | Stride between consecutive elements of dy. incy != 0. |
void magma_cgerc | ( | magma_int_t | m, | |
magma_int_t | n, | |||
magmaFloatComplex | alpha, | |||
const magmaFloatComplex * | dx, | |||
magma_int_t | incx, | |||
const magmaFloatComplex * | dy, | |||
magma_int_t | incy, | |||
magmaFloatComplex * | dA, | |||
magma_int_t | ldda | |||
) |
Perform rank-1 update, .
[in] | m | Number of rows of A. m >= 0. |
[in] | n | Number of columns of A. n >= 0. |
[in] | alpha | Scalar ![]() |
[in] | dx | COMPLEX array on GPU device. The m element vector x of dimension (1 + (m-1)*incx). |
[in] | incx | Stride between consecutive elements of dx. incx != 0. |
[in] | dy | COMPLEX array on GPU device. The n element vector y of dimension (1 + (n-1)*incy). |
[in] | incy | Stride between consecutive elements of dy. incy != 0. |
[in,out] | dA | COMPLEX array on GPU device. The m-by-n matrix A of dimension (ldda,n), ldda >= max(1,m). |
[in] | ldda | Leading dimension of dA. |
void magma_cgeru | ( | magma_int_t | m, | |
magma_int_t | n, | |||
magmaFloatComplex | alpha, | |||
const magmaFloatComplex * | dx, | |||
magma_int_t | incx, | |||
const magmaFloatComplex * | dy, | |||
magma_int_t | incy, | |||
magmaFloatComplex * | dA, | |||
magma_int_t | ldda | |||
) |
Perform rank-1 update (unconjugated), .
[in] | m | Number of rows of A. m >= 0. |
[in] | n | Number of columns of A. n >= 0. |
[in] | alpha | Scalar ![]() |
[in] | dx | COMPLEX array on GPU device. The m element vector x of dimension (1 + (m-1)*incx). |
[in] | incx | Stride between consecutive elements of dx. incx != 0. |
[in] | dy | COMPLEX array on GPU device. The n element vector y of dimension (1 + (n-1)*incy). |
[in] | incy | Stride between consecutive elements of dy. incy != 0. |
[in,out] | dA | COMPLEX array of dimension (ldda,n), ldda >= max(1,m). The m-by-n matrix A, on GPU device. |
[in] | ldda | Leading dimension of dA. |
void magma_chemv | ( | magma_uplo_t | uplo, | |
magma_int_t | n, | |||
magmaFloatComplex | alpha, | |||
const magmaFloatComplex * | dA, | |||
magma_int_t | ldda, | |||
const magmaFloatComplex * | dx, | |||
magma_int_t | incx, | |||
magmaFloatComplex | beta, | |||
magmaFloatComplex * | dy, | |||
magma_int_t | incy | |||
) |
Perform Hermitian matrix-vector product, .
[in] | uplo | Whether the upper or lower triangle of A is referenced. |
[in] | n | Number of rows and columns of A. n >= 0. |
[in] | alpha | Scalar ![]() |
[in] | dA | COMPLEX array of dimension (ldda,n), ldda >= max(1,n). The n-by-n matrix A, on GPU device. |
[in] | ldda | Leading dimension of dA. |
[in] | dx | COMPLEX array on GPU device. The m element vector x of dimension (1 + (m-1)*incx). |
[in] | incx | Stride between consecutive elements of dx. incx != 0. |
[in] | beta | Scalar ![]() |
[in,out] | dy | COMPLEX array on GPU device. The n element vector y of dimension (1 + (n-1)*incy). |
[in] | incy | Stride between consecutive elements of dy. incy != 0. |
void magma_cher | ( | magma_uplo_t | uplo, | |
magma_int_t | n, | |||
float | alpha, | |||
const magmaFloatComplex * | dx, | |||
magma_int_t | incx, | |||
magmaFloatComplex * | dA, | |||
magma_int_t | ldda | |||
) |
Perform Hermitian rank-1 update, .
[in] | uplo | Whether the upper or lower triangle of A is referenced. |
[in] | n | Number of rows and columns of A. n >= 0. |
[in] | alpha | Scalar ![]() |
[in] | dx | COMPLEX array on GPU device. The n element vector x of dimension (1 + (n-1)*incx). |
[in] | incx | Stride between consecutive elements of dx. incx != 0. |
[in,out] | dA | COMPLEX array of dimension (ldda,n), ldda >= max(1,n). The n-by-n matrix A, on GPU device. |
[in] | ldda | Leading dimension of dA. |
void magma_cher2 | ( | magma_uplo_t | uplo, | |
magma_int_t | n, | |||
magmaFloatComplex | alpha, | |||
const magmaFloatComplex * | dx, | |||
magma_int_t | incx, | |||
const magmaFloatComplex * | dy, | |||
magma_int_t | incy, | |||
magmaFloatComplex * | dA, | |||
magma_int_t | ldda | |||
) |
Perform Hermitian rank-2 update, .
[in] | uplo | Whether the upper or lower triangle of A is referenced. |
[in] | n | Number of rows and columns of A. n >= 0. |
[in] | alpha | Scalar ![]() |
[in] | dx | COMPLEX array on GPU device. The n element vector x of dimension (1 + (n-1)*incx). |
[in] | incx | Stride between consecutive elements of dx. incx != 0. |
[in] | dy | COMPLEX array on GPU device. The n element vector y of dimension (1 + (n-1)*incy). |
[in] | incy | Stride between consecutive elements of dy. incy != 0. |
[in,out] | dA | COMPLEX array of dimension (ldda,n), ldda >= max(1,n). The n-by-n matrix A, on GPU device. |
[in] | ldda | Leading dimension of dA. |
void magma_ctrmv | ( | magma_uplo_t | uplo, | |
magma_trans_t | trans, | |||
magma_diag_t | diag, | |||
magma_int_t | n, | |||
const magmaFloatComplex * | dA, | |||
magma_int_t | ldda, | |||
magmaFloatComplex * | dx, | |||
magma_int_t | incx | |||
) |
Perform triangular matrix-vector product.
(trans == MagmaNoTrans), or
(trans == MagmaTrans), or
(trans == MagmaConjTrans).
[in] | uplo | Whether the upper or lower triangle of A is referenced. |
[in] | trans | Operation to perform on A. |
[in] | diag | Whether the diagonal of A is assumed to be unit or non-unit. |
[in] | n | Number of rows and columns of A. n >= 0. |
[in] | dA | COMPLEX array of dimension (ldda,n), ldda >= max(1,n). The n-by-n matrix A, on GPU device. |
[in] | ldda | Leading dimension of dA. |
[in] | dx | COMPLEX array on GPU device. The n element vector x of dimension (1 + (n-1)*incx). |
[in] | incx | Stride between consecutive elements of dx. incx != 0. |
void magma_ctrsv | ( | magma_uplo_t | uplo, | |
magma_trans_t | trans, | |||
magma_diag_t | diag, | |||
magma_int_t | n, | |||
const magmaFloatComplex * | dA, | |||
magma_int_t | ldda, | |||
magmaFloatComplex * | dx, | |||
magma_int_t | incx | |||
) |
Solve triangular matrix-vector system (one right-hand side).
(trans == MagmaNoTrans), or
(trans == MagmaTrans), or
(trans == MagmaConjTrans).
[in] | uplo | Whether the upper or lower triangle of A is referenced. |
[in] | trans | Operation to perform on A. |
[in] | diag | Whether the diagonal of A is assumed to be unit or non-unit. |
[in] | n | Number of rows and columns of A. n >= 0. |
[in] | dA | COMPLEX array of dimension (ldda,n), ldda >= max(1,n). The n-by-n matrix A, on GPU device. |
[in] | ldda | Leading dimension of dA. |
[in,out] | dx | COMPLEX array on GPU device. On entry, the n element RHS vector b of dimension (1 + (n-1)*incx). On exit, overwritten with the solution vector x. |
[in] | incx | Stride between consecutive elements of dx. incx != 0. |
void magmablas_cgemv_batched | ( | magma_trans_t | trans, | |
magma_int_t | m, | |||
magma_int_t | n, | |||
magmaFloatComplex | alpha, | |||
magmaFloatComplex_ptr | dA_array[], | |||
magma_int_t | ldda, | |||
magmaFloatComplex_ptr | dx_array[], | |||
magma_int_t | incx, | |||
magmaFloatComplex | beta, | |||
magmaFloatComplex_ptr | dy_array[], | |||
magma_int_t | incy, | |||
magma_int_t | batchCount | |||
) |
This routine computes Y = alpha opt(A) x + beta y, on the GPU, where A = dA_array[i],x = x_array[i] and y = y_array[i], i=[0,batchCount-1].
This is a batched version.
[in] | trans | CHARACTER*1. On entry, TRANS specifies the form of op( A ) to be used in the matrix multiplication as follows: = 'N': op( A ) = A. = 'T': op( A ) = A**T. = 'C': op( A ) = A**H. |
[in] | m | INTEGER. On entry, M specifies the number of rows of the matrix opt(A). |
[in] | n | INTEGER. On entry, N specifies the number of columns of the matrix opt(A) |
[in] | alpha | COMPLEX. On entry, ALPHA specifies the scalar alpha. |
[in] | dA_array | A = dA_array[i] A: COMPLEX array of dimension ( LDA, n ) on the GPU. |
[in] | lda | INTEGER. LDA specifies the leading dimension of A. |
[in] | x_array | x = x_array[i] x: COMPLEX array of dimension. n if trans == MagmaNoTrans. m if trans == MagmaTrans or MagmaConjTrans. |
[in] | incx | INTEGER. incx specifies the increment for the elments of x. incx must not be zero. |
[in] | beta | REAL. On entry, BETA specifies the scalar beta. |
[out] | y_array | y = y_array[i]: On exit y = alpha opt(A) x + beta y. y: COMPLEX array of dimension. m if trans == MagmaNoTrans. n if trans == MagmaTrans or MagmaConjTrans. |
[in] | incy | INTEGER. incy specifies the increment for the elments of y. incy must not be zero. |
[in] | batchCount | INTEGER number of pointers contained in dA_array, x_array and y_array. |
void magmablas_cgemv_conjv | ( | magma_int_t | m, | |
magma_int_t | n, | |||
magmaFloatComplex | alpha, | |||
magmaFloatComplex_const_ptr | dA, | |||
magma_int_t | ldda, | |||
magmaFloatComplex_const_ptr | dx, | |||
magma_int_t | incx, | |||
magmaFloatComplex | beta, | |||
magmaFloatComplex_ptr | dy, | |||
magma_int_t | incy | |||
) |
CGEMV_CONJV performs the matrix-vector operation.
y := alpha*A*conj(x) + beta*y,
where alpha and beta are scalars, x and y are vectors and A is an m by n matrix.
[in] | m | INTEGER On entry, m specifies the number of rows of the matrix A. |
[in] | n | INTEGER On entry, n specifies the number of columns of the matrix A |
[in] | alpha | COMPLEX On entry, ALPHA specifies the scalar alpha. |
[in] | dA | COMPLEX array of dimension ( LDA, n ) on the GPU. |
[in] | lda | INTEGER LDA specifies the leading dimension of A. |
[in] | dx | COMPLEX array of dimension n |
[in] | incx | Specifies the increment for the elements of X. INCX must not be zero. |
[in] | beta | DOUBLE REAL On entry, BETA specifies the scalar beta. When BETA is supplied as zero then Y need not be set on input. |
[out] | dy | REAL array of dimension m |
[in] | incy | Specifies the increment for the elements of Y. INCY must not be zero. |
magma_int_t magmablas_chemv | ( | magma_uplo_t | uplo, | |
magma_int_t | n, | |||
magmaFloatComplex | alpha, | |||
magmaFloatComplex_const_ptr | dA, | |||
magma_int_t | ldda, | |||
magmaFloatComplex_const_ptr | dx, | |||
magma_int_t | incx, | |||
magmaFloatComplex | beta, | |||
magmaFloatComplex_ptr | dy, | |||
magma_int_t | incy | |||
) |
magmablas_chemv performs the matrix-vector operation:
y := alpha*A*x + beta*y,
where alpha and beta are scalars, x and y are n element vectors and A is an n by n Hermitian matrix.
[in] | uplo | magma_uplo_t. On entry, UPLO specifies whether the upper or lower triangular part of the array A is to be referenced as follows:
|
[in] | n | INTEGER. On entry, N specifies the order of the matrix A. N must be at least zero. |
[in] | alpha | COMPLEX. On entry, ALPHA specifies the scalar alpha. |
[in] | dA | COMPLEX array of DIMENSION ( LDDA, n ). Before entry with UPLO = MagmaUpper, the leading n by n upper triangular part of the array A must contain the upper triangular part of the Hermitian matrix and the strictly lower triangular part of A is not referenced. Before entry with UPLO = MagmaLower, the leading n by n lower triangular part of the array A must contain the lower triangular part of the Hermitian matrix and the strictly upper triangular part of A is not referenced. Note that the imaginary parts of the diagonal elements need not be set and are assumed to be zero. |
[in] | ldda | INTEGER. On entry, LDDA specifies the first dimension of A as declared in the calling (sub) program. LDDA must be at least max( 1, n ). It is recommended that ldda is multiple of 16. Otherwise performance would be deteriorated as the memory accesses would not be fully coalescent. |
[in] | dx | COMPLEX array of dimension at least ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented array X must contain the n element vector x. |
[in] | incx | INTEGER. On entry, INCX specifies the increment for the elements of X. INCX must not be zero. |
[in] | beta | COMPLEX. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then Y need not be set on input. |
[in,out] | dy | COMPLEX array of dimension at least ( 1 + ( n - 1 )*abs( INCY ) ). Before entry, the incremented array Y must contain the n element vector y. On exit, Y is overwritten by the updated vector y. |
[in] | incy | INTEGER. On entry, INCY specifies the increment for the elements of Y. INCY must not be zero. |
magma_int_t magmablas_chemv_mgpu_offset | ( | magma_uplo_t | uplo, | |
magma_int_t | n, | |||
magmaFloatComplex | alpha, | |||
magmaFloatComplex_ptr | dA[], | |||
magma_int_t | ldda, | |||
magmaFloatComplex_ptr | dx[], | |||
magma_int_t | incx, | |||
magmaFloatComplex | beta, | |||
magmaFloatComplex_ptr | dy[], | |||
magma_int_t | incy, | |||
magmaFloatComplex_ptr | dwork[], | |||
magma_int_t | lwork, | |||
magma_int_t | ngpu, | |||
magma_int_t | nb, | |||
magma_int_t | offset, | |||
magma_queue_t | queues[][10] | |||
) |
magmablas_chemv performs the matrix-vector operation:
y := alpha*A*x + beta*y,
where alpha and beta are scalars, x and y are n element vectors and A is an n by n hermitian matrix.
[in] | uplo | magma_uplo_t. On entry, UPLO specifies whether the upper or lower triangular part of the array A is to be referenced as follows:
|
[in] | n | INTEGER. On entry, N specifies the order of the matrix A. N must be at least zero. |
[in] | alpha | COMPLEX. On entry, ALPHA specifies the scalar alpha. |
[in] | dA | COMPLEX array of DIMENSION ( LDDA, n ). Before entry with UPLO = MagmaUpper, the leading n by n upper triangular part of the array A must contain the upper triangular part of the hermitian matrix and the strictly lower triangular part of A is not referenced. Before entry with UPLO = MagmaLower, the leading n by n lower triangular part of the array A must contain the lower triangular part of the hermitian matrix and the strictly upper triangular part of A is not referenced. Note that the imaginary parts of the diagonal elements need not be set and are assumed to be zero. |
[in] | ldda | INTEGER. On entry, LDDA specifies the first dimension of A as declared in the calling (sub) program. LDDA must be at least max( 1, n ). It is recommended that ldda is multiple of 16. Otherwise performance would be deteriorated as the memory accesses would not be fully coalescent. |
[in] | dx | COMPLEX array of dimension at least ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented array X must contain the n element vector x. |
[in] | incx | INTEGER. On entry, INCX specifies the increment for the elements of X. INCX must not be zero. |
[in] | beta | COMPLEX. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then Y need not be set on input. |
[in,out] | dy | COMPLEX array of dimension at least ( 1 + ( n - 1 )*abs( INCY ) ). Before entry, the incremented array Y must contain the n element vector y. On exit, Y is overwritten by the updated vector y. |
[in] | incy | INTEGER. On entry, INCY specifies the increment for the elements of Y. INCY must not be zero. |
magma_int_t magmablas_chemv_work | ( | magma_uplo_t | uplo, | |
magma_int_t | n, | |||
magmaFloatComplex | alpha, | |||
magmaFloatComplex_const_ptr | dA, | |||
magma_int_t | ldda, | |||
magmaFloatComplex_const_ptr | dx, | |||
magma_int_t | incx, | |||
magmaFloatComplex | beta, | |||
magmaFloatComplex_ptr | dy, | |||
magma_int_t | incy, | |||
magmaFloatComplex_ptr | dwork, | |||
magma_int_t | lwork | |||
) |
magmablas_chemv_work performs the matrix-vector operation:
y := alpha*A*x + beta*y,
where alpha and beta are scalars, x and y are n element vectors and A is an n by n Hermitian matrix.
[in] | uplo | magma_uplo_t. On entry, UPLO specifies whether the upper or lower triangular part of the array A is to be referenced as follows:
|
[in] | n | INTEGER. On entry, N specifies the order of the matrix A. N must be at least zero. |
[in] | alpha | COMPLEX. On entry, ALPHA specifies the scalar alpha. |
[in] | dA | COMPLEX array of DIMENSION ( LDDA, n ). Before entry with UPLO = MagmaUpper, the leading n by n upper triangular part of the array A must contain the upper triangular part of the Hermitian matrix and the strictly lower triangular part of A is not referenced. Before entry with UPLO = MagmaLower, the leading n by n lower triangular part of the array A must contain the lower triangular part of the Hermitian matrix and the strictly upper triangular part of A is not referenced. Note that the imaginary parts of the diagonal elements need not be set and are assumed to be zero. |
[in] | ldda | INTEGER. On entry, LDDA specifies the first dimension of A as declared in the calling (sub) program. LDDA must be at least max( 1, n ). It is recommended that ldda is multiple of 16. Otherwise performance would be deteriorated as the memory accesses would not be fully coalescent. |
[in] | dx | COMPLEX array of dimension at least ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented array X must contain the n element vector x. |
[in] | incx | INTEGER. On entry, INCX specifies the increment for the elements of X. INCX must not be zero. |
[in] | beta | COMPLEX. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then Y need not be set on input. |
[in,out] | dy | COMPLEX array of dimension at least ( 1 + ( n - 1 )*abs( INCY ) ). Before entry, the incremented array Y must contain the n element vector y. On exit, Y is overwritten by the updated vector y. |
[in] | incy | INTEGER. On entry, INCY specifies the increment for the elements of Y. INCY must not be zero. |
[in] | dwork | (workspace) COMPLEX array on the GPU, dimension (MAX(1, LWORK)), |
[in] | lwork | INTEGER. The dimension of the array DWORK. LWORK >= LDDA * ceil( N / NB_X ), where NB_X = 64. |
MAGMA implements chemv through two steps: 1) perform the multiplication in each thread block and put the intermediate value in dwork. 2) sum the intermediate values and store the final result in y.
magamblas_chemv_work requires users to provide a workspace, while magmablas_chemv is a wrapper routine allocating the workspace inside the routine and provides the same interface as cublas.
If users need to call chemv frequently, we suggest using magmablas_chemv_work instead of magmablas_chemv. As the overhead to allocate and free in device memory in magmablas_chemv would hurt performance. Our tests show that this penalty is about 10 Gflop/s when the matrix size is around 10000.
void magmablas_cswapblk | ( | magma_order_t | order, | |
magma_int_t | n, | |||
magmaFloatComplex_ptr | dA, | |||
magma_int_t | ldda, | |||
magmaFloatComplex_ptr | dB, | |||
magma_int_t | lddb, | |||
magma_int_t | i1, | |||
magma_int_t | i2, | |||
const magma_int_t * | ipiv, | |||
magma_int_t | inci, | |||
magma_int_t | offset | |||
) |
magma_int_t magmablas_csymv | ( | magma_uplo_t | uplo, | |
magma_int_t | n, | |||
magmaFloatComplex | alpha, | |||
magmaFloatComplex_const_ptr | dA, | |||
magma_int_t | ldda, | |||
magmaFloatComplex_const_ptr | dx, | |||
magma_int_t | incx, | |||
magmaFloatComplex | beta, | |||
magmaFloatComplex_ptr | dy, | |||
magma_int_t | incy | |||
) |
magmablas_csymv performs the matrix-vector operation:
y := alpha*A*x + beta*y,
where alpha and beta are scalars, x and y are n element vectors and A is an n by n complex symmetric matrix.
[in] | uplo | magma_uplo_t. On entry, UPLO specifies whether the upper or lower triangular part of the array A is to be referenced as follows:
|
[in] | n | INTEGER. On entry, N specifies the order of the matrix A. N must be at least zero. |
[in] | alpha | COMPLEX. On entry, ALPHA specifies the scalar alpha. |
[in] | dA | COMPLEX array of DIMENSION ( LDA, n ). Before entry with UPLO = MagmaUpper, the leading n by n upper triangular part of the array A must contain the upper triangular part of the symmetric matrix and the strictly lower triangular part of A is not referenced. Before entry with UPLO = MagmaLower, the leading n by n lower triangular part of the array A must contain the lower triangular part of the symmetric matrix and the strictly upper triangular part of A is not referenced. Note that the imaginary parts of the diagonal elements need not be set and are assumed to be zero. |
[in] | ldda | INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. LDA must be at least max( 1, n ). It is recommended that ldda is multiple of 16. Otherwise performance would be deteriorated as the memory accesses would not be fully coalescent. |
[in] | dx | COMPLEX array of dimension at least ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented array X must contain the n element vector x. |
[in] | incx | INTEGER. On entry, INCX specifies the increment for the elements of X. INCX must not be zero. |
[in] | beta | COMPLEX. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then Y need not be set on input. |
[in,out] | dy | COMPLEX array of dimension at least ( 1 + ( n - 1 )*abs( INCY ) ). Before entry, the incremented array Y must contain the n element vector y. On exit, Y is overwritten by the updated vector y. |
[in] | incy | INTEGER. On entry, INCY specifies the increment for the elements of Y. INCY must not be zero. |
magma_int_t magmablas_csymv_work | ( | magma_uplo_t | uplo, | |
magma_int_t | n, | |||
magmaFloatComplex | alpha, | |||
magmaFloatComplex_const_ptr | dA, | |||
magma_int_t | ldda, | |||
magmaFloatComplex_const_ptr | dx, | |||
magma_int_t | incx, | |||
magmaFloatComplex | beta, | |||
magmaFloatComplex_ptr | dy, | |||
magma_int_t | incy, | |||
magmaFloatComplex_ptr | dwork, | |||
magma_int_t | lwork | |||
) |
magmablas_csymv_work performs the matrix-vector operation:
y := alpha*A*x + beta*y,
where alpha and beta are scalars, x and y are n element vectors and A is an n by n complex symmetric matrix.
[in] | uplo | magma_uplo_t. On entry, UPLO specifies whether the upper or lower triangular part of the array A is to be referenced as follows:
|
[in] | n | INTEGER. On entry, N specifies the order of the matrix A. N must be at least zero. |
[in] | alpha | COMPLEX. On entry, ALPHA specifies the scalar alpha. |
[in] | dA | COMPLEX array of DIMENSION ( LDA, n ). Before entry with UPLO = MagmaUpper, the leading n by n upper triangular part of the array A must contain the upper triangular part of the symmetric matrix and the strictly lower triangular part of A is not referenced. Before entry with UPLO = MagmaLower, the leading n by n lower triangular part of the array A must contain the lower triangular part of the symmetric matrix and the strictly upper triangular part of A is not referenced. Note that the imaginary parts of the diagonal elements need not be set and are assumed to be zero. |
[in] | ldda | INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. LDA must be at least max( 1, n ). It is recommended that lda is multiple of 16. Otherwise performance would be deteriorated as the memory accesses would not be fully coalescent. |
[in] | dx | COMPLEX array of dimension at least ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented array X must contain the n element vector x. |
[in] | incx | INTEGER. On entry, INCX specifies the increment for the elements of X. INCX must not be zero. |
[in] | beta | COMPLEX. On entry, BETA specifies the scalar beta. When BETA is supplied as zero then Y need not be set on input. |
[in,out] | dy | COMPLEX array of dimension at least ( 1 + ( n - 1 )*abs( INCY ) ). Before entry, the incremented array Y must contain the n element vector y. On exit, Y is overwritten by the updated vector y. |
[in] | incy | INTEGER. On entry, INCY specifies the increment for the elements of Y. INCY must not be zero. |
[in] | dwork | (workspace) COMPLEX array on the GPU, dimension (MAX(1, LWORK)), |
[in] | lwork | INTEGER. The dimension of the array DWORK. LWORK >= LDA * ceil( N / NB_X ), where NB_X = 64. |
MAGMA implements csymv through two steps: 1) perform the multiplication in each thread block and put the intermediate value in dwork. 2) sum the intermediate values and store the final result in y.
magamblas_csymv_work requires users to provide a workspace, while magmablas_csymv is a wrapper routine allocating the workspace inside the routine and provides the same interface as cublas.
If users need to call csymv frequently, we suggest using magmablas_csymv_work instead of magmablas_csymv. As the overhead to allocate and free in device memory in magmablas_csymv would hurt performance. Our tests show that this penalty is about 10 Gflop/s when the matrix size is around 10000.