OpenNL: modify SuperLU to use doubles rather than floats, for better precision.

This helps to improve the accuracy of UV unwrapping and laplacian deform for
high poly meshes, which could get warped quite badly. It's not much slower,
doubles are pretty fast on modern CPUs, but it does double memory usage. This
seems acceptable as otherwise high poly meshes would not work correctly anyway.

Fixes T39004.
This commit is contained in:
Brecht Van Lommel 2014-09-20 19:51:34 +02:00
parent 87c27ef92f
commit 0b12e61040
Notes: blender-bot 2023-02-14 11:03:49 +01:00
Referenced by issue #39004, Laplacian Deform incorrect with > 10K faces
21 changed files with 311 additions and 315 deletions

View File

@ -100,11 +100,11 @@ NLContext nlGetCurrent(void);
/* State get/set */
void nlSolverParameterf(NLenum pname, NLfloat param);
void nlSolverParameterf(NLenum pname, NLdouble param);
void nlSolverParameteri(NLenum pname, NLint param);
void nlGetBooleanv(NLenum pname, NLboolean* params);
void nlGetFloatv(NLenum pname, NLfloat* params);
void nlGetFloatv(NLenum pname, NLdouble* params);
void nlGetIntergerv(NLenum pname, NLint* params);
void nlEnable(NLenum pname);
@ -113,8 +113,8 @@ NLboolean nlIsEnabled(NLenum pname);
/* Variables */
void nlSetVariable(NLuint rhsindex, NLuint index, NLfloat value);
NLfloat nlGetVariable(NLuint rhsindex, NLuint index);
void nlSetVariable(NLuint rhsindex, NLuint index, NLdouble value);
NLdouble nlGetVariable(NLuint rhsindex, NLuint index);
void nlLockVariable(NLuint index);
void nlUnlockVariable(NLuint index);
NLboolean nlVariableIsLocked(NLuint index);
@ -126,13 +126,9 @@ void nlEnd(NLenum primitive);
/* Setting elements in matrix/vector */
void nlMatrixAdd(NLuint row, NLuint col, NLfloat value);
void nlRightHandSideAdd(NLuint rhsindex, NLuint index, NLfloat value);
void nlRightHandSideSet(NLuint rhsindex, NLuint index, NLfloat value);
/* Multiply */
void nlMatrixMultiply(NLfloat *x, NLfloat *y);
void nlMatrixAdd(NLuint row, NLuint col, NLdouble value);
void nlRightHandSideAdd(NLuint rhsindex, NLuint index, NLdouble value);
void nlRightHandSideSet(NLuint rhsindex, NLuint index, NLdouble value);
/* Solve */

View File

@ -69,7 +69,7 @@ static void __nl_assertion_failed(char* cond, char* file, int line) {
}
static void __nl_range_assertion_failed(
float x, float min_val, float max_val, char* file, int line
double x, double min_val, double max_val, char* file, int line
) {
fprintf(
stderr,
@ -151,7 +151,7 @@ static void __nl_should_not_have_reached(char* file, int line) {
typedef struct {
NLuint index;
NLfloat value;
NLdouble value;
} __NLCoeff;
typedef struct {
@ -183,7 +183,7 @@ static void __nlRowColumnGrow(__NLRowColumn* c) {
}
}
static void __nlRowColumnAdd(__NLRowColumn* c, NLint index, NLfloat value) {
static void __nlRowColumnAdd(__NLRowColumn* c, NLint index, NLdouble value) {
NLuint i;
for(i=0; i<c->size; i++) {
if(c->coeff[i].index == (NLuint)index) {
@ -200,7 +200,7 @@ static void __nlRowColumnAdd(__NLRowColumn* c, NLint index, NLfloat value) {
}
/* Does not check whether the index already exists */
static void __nlRowColumnAppend(__NLRowColumn* c, NLint index, NLfloat value) {
static void __nlRowColumnAppend(__NLRowColumn* c, NLint index, NLdouble value) {
if(c->size == c->capacity) {
__nlRowColumnGrow(c);
}
@ -229,7 +229,7 @@ typedef struct {
NLenum storage;
__NLRowColumn* row;
__NLRowColumn* column;
NLfloat* diag;
NLdouble* diag;
} __NLSparseMatrix;
@ -259,7 +259,7 @@ static void __nlSparseMatrixConstruct(
}
M->diag_size = MIN(m,n);
M->diag = __NL_NEW_ARRAY(NLfloat, M->diag_size);
M->diag = __NL_NEW_ARRAY(NLdouble, M->diag_size);
}
static void __nlSparseMatrixDestroy(__NLSparseMatrix* M) {
@ -283,7 +283,7 @@ static void __nlSparseMatrixDestroy(__NLSparseMatrix* M) {
}
static void __nlSparseMatrixAdd(
__NLSparseMatrix* M, NLuint i, NLuint j, NLfloat value
__NLSparseMatrix* M, NLuint i, NLuint j, NLdouble value
) {
__nl_parano_range_assert(i, 0, M->m - 1);
__nl_parano_range_assert(j, 0, M->n - 1);
@ -313,7 +313,7 @@ static void __nlSparseMatrixClear( __NLSparseMatrix* M) {
__nlRowColumnClear(&(M->column[i]));
}
}
__NL_CLEAR_ARRAY(NLfloat, M->diag, M->diag_size);
__NL_CLEAR_ARRAY(NLdouble, M->diag, M->diag_size);
}
/* Returns the number of non-zero coefficients */
@ -338,7 +338,7 @@ static NLuint __nlSparseMatrixNNZ( __NLSparseMatrix* M) {
/* SparseMatrix x Vector routines, internal helper routines */
static void __nlSparseMatrix_mult_rows_symmetric(
__NLSparseMatrix* A, NLfloat* x, NLfloat* y
__NLSparseMatrix* A, NLdouble* x, NLdouble* y
) {
NLuint m = A->m;
NLuint i,ij;
@ -358,7 +358,7 @@ static void __nlSparseMatrix_mult_rows_symmetric(
}
static void __nlSparseMatrix_mult_rows(
__NLSparseMatrix* A, NLfloat* x, NLfloat* y
__NLSparseMatrix* A, NLdouble* x, NLdouble* y
) {
NLuint m = A->m;
NLuint i,ij;
@ -375,7 +375,7 @@ static void __nlSparseMatrix_mult_rows(
}
static void __nlSparseMatrix_mult_cols_symmetric(
__NLSparseMatrix* A, NLfloat* x, NLfloat* y
__NLSparseMatrix* A, NLdouble* x, NLdouble* y
) {
NLuint n = A->n;
NLuint j,ii;
@ -395,13 +395,13 @@ static void __nlSparseMatrix_mult_cols_symmetric(
}
static void __nlSparseMatrix_mult_cols(
__NLSparseMatrix* A, NLfloat* x, NLfloat* y
__NLSparseMatrix* A, NLdouble* x, NLdouble* y
) {
NLuint n = A->n;
NLuint j,ii;
__NLRowColumn* Cj = NULL;
__NLCoeff* c = NULL;
__NL_CLEAR_ARRAY(NLfloat, y, A->m);
__NL_CLEAR_ARRAY(NLdouble, y, A->m);
for(j=0; j<n; j++) {
Cj = &(A->column[j]);
for(ii=0; ii<Cj->size; ii++) {
@ -414,7 +414,7 @@ static void __nlSparseMatrix_mult_cols(
/************************************************************************************/
/* SparseMatrix x Vector routines, main driver routine */
static void __nlSparseMatrixMult(__NLSparseMatrix* A, NLfloat* x, NLfloat* y) {
static void __nlSparseMatrixMult(__NLSparseMatrix* A, NLdouble* x, NLdouble* y) {
if(A->storage & __NL_ROWS) {
if(A->storage & __NL_SYMMETRIC) {
__nlSparseMatrix_mult_rows_symmetric(A, x, y);
@ -440,7 +440,7 @@ static void __nlSparseMatrix_square(
NLuint i, j0, j1;
__NLRowColumn *Ri = NULL;
__NLCoeff *c0 = NULL, *c1 = NULL;
float value;
double value;
__nlSparseMatrixConstruct(AtA, n, n, A->storage);
@ -460,7 +460,7 @@ static void __nlSparseMatrix_square(
}
static void __nlSparseMatrix_transpose_mult_rows(
__NLSparseMatrix* A, NLfloat* x, NLfloat* y
__NLSparseMatrix* A, NLdouble* x, NLdouble* y
) {
NLuint m = A->m;
NLuint n = A->n;
@ -468,7 +468,7 @@ static void __nlSparseMatrix_transpose_mult_rows(
__NLRowColumn* Ri = NULL;
__NLCoeff* c = NULL;
__NL_CLEAR_ARRAY(NLfloat, y, n);
__NL_CLEAR_ARRAY(NLdouble, y, n);
for(i=0; i<m; i++) {
Ri = &(A->row[i]);
@ -482,10 +482,10 @@ static void __nlSparseMatrix_transpose_mult_rows(
/************************************************************************************/
/* NLContext data structure */
typedef void(*__NLMatrixFunc)(float* x, float* y);
typedef void(*__NLMatrixFunc)(double* x, double* y);
typedef struct {
NLfloat value[4];
NLdouble value[4];
NLboolean locked;
NLuint index;
__NLRowColumn *a;
@ -503,11 +503,11 @@ typedef struct {
NLuint n;
NLuint m;
__NLVariable* variable;
NLfloat* b;
NLfloat* Mtb;
NLdouble* b;
NLdouble* Mtb;
__NLSparseMatrix M;
__NLSparseMatrix MtM;
NLfloat* x;
NLdouble* x;
NLuint nb_variables;
NLuint nb_rows;
NLboolean least_squares;
@ -520,7 +520,7 @@ typedef struct {
NLboolean alloc_x;
NLboolean alloc_b;
NLboolean alloc_Mtb;
NLfloat error;
NLdouble error;
__NLMatrixFunc matrix_vector_prod;
struct __NLSuperLUContext {
@ -533,7 +533,7 @@ typedef struct {
static __NLContext* __nlCurrentContext = NULL;
static void __nlMatrixVectorProd_default(NLfloat* x, NLfloat* y) {
static void __nlMatrixVectorProd_default(NLdouble* x, NLdouble* y) {
__nlSparseMatrixMult(&(__nlCurrentContext->M), x, y);
}
@ -611,7 +611,7 @@ static void __nlTransition(NLenum from_state, NLenum to_state) {
/************************************************************************************/
/* Get/Set parameters */
void nlSolverParameterf(NLenum pname, NLfloat param) {
void nlSolverParameterf(NLenum pname, NLdouble param) {
__nlCheckState(__NL_STATE_INITIAL);
switch(pname) {
case NL_NB_VARIABLES: {
@ -677,22 +677,22 @@ void nlGetBooleanv(NLenum pname, NLboolean* params) {
}
}
void nlGetFloatv(NLenum pname, NLfloat* params) {
void nlGetFloatv(NLenum pname, NLdouble* params) {
switch(pname) {
case NL_NB_VARIABLES: {
*params = (NLfloat)(__nlCurrentContext->nb_variables);
*params = (NLdouble)(__nlCurrentContext->nb_variables);
} break;
case NL_NB_ROWS: {
*params = (NLfloat)(__nlCurrentContext->nb_rows);
*params = (NLdouble)(__nlCurrentContext->nb_rows);
} break;
case NL_LEAST_SQUARES: {
*params = (NLfloat)(__nlCurrentContext->least_squares);
*params = (NLdouble)(__nlCurrentContext->least_squares);
} break;
case NL_SYMMETRIC: {
*params = (NLfloat)(__nlCurrentContext->symmetric);
*params = (NLdouble)(__nlCurrentContext->symmetric);
} break;
case NL_ERROR: {
*params = (NLfloat)(__nlCurrentContext->error);
*params = (NLdouble)(__nlCurrentContext->error);
} break;
default: {
__nl_assert_not_reached;
@ -751,13 +751,13 @@ NLboolean nlIsEnabled(NLenum pname) {
/************************************************************************************/
/* Get/Set Lock/Unlock variables */
void nlSetVariable(NLuint rhsindex, NLuint index, NLfloat value) {
void nlSetVariable(NLuint rhsindex, NLuint index, NLdouble value) {
__nlCheckState(__NL_STATE_SYSTEM);
__nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
__nlCurrentContext->variable[index].value[rhsindex] = value;
}
NLfloat nlGetVariable(NLuint rhsindex, NLuint index) {
NLdouble nlGetVariable(NLuint rhsindex, NLuint index) {
__nl_assert(__nlCurrentContext->state != __NL_STATE_INITIAL);
__nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
return __nlCurrentContext->variable[index].value[rhsindex];
@ -870,15 +870,15 @@ static void __nlBeginMatrix() {
__nlSparseMatrixConstruct(&context->M, m, n, storage);
context->alloc_M = NL_TRUE;
context->b = __NL_NEW_ARRAY(NLfloat, m*context->nb_rhs);
context->b = __NL_NEW_ARRAY(NLdouble, m*context->nb_rhs);
context->alloc_b = NL_TRUE;
context->x = __NL_NEW_ARRAY(NLfloat, n*context->nb_rhs);
context->x = __NL_NEW_ARRAY(NLdouble, n*context->nb_rhs);
context->alloc_x = NL_TRUE;
}
else {
/* need to recompute b only, A is not constructed anymore */
__NL_CLEAR_ARRAY(NLfloat, context->b, context->m*context->nb_rhs);
__NL_CLEAR_ARRAY(NLdouble, context->b, context->m*context->nb_rhs);
}
__nlVariablesToVector();
@ -888,7 +888,7 @@ static void __nlEndMatrixRHS(NLuint rhs) {
__NLContext *context = __nlCurrentContext;
__NLVariable *variable;
__NLRowColumn *a;
NLfloat *b, *Mtb;
NLdouble *b, *Mtb;
NLuint i, j;
b = context->b + context->m*rhs;
@ -922,7 +922,7 @@ static void __nlEndMatrix() {
context->alloc_MtM = NL_TRUE;
context->Mtb =
__NL_NEW_ARRAY(NLfloat, context->n*context->nb_rhs);
__NL_NEW_ARRAY(NLdouble, context->n*context->nb_rhs);
context->alloc_Mtb = NL_TRUE;
}
}
@ -931,7 +931,7 @@ static void __nlEndMatrix() {
__nlEndMatrixRHS(i);
}
void nlMatrixAdd(NLuint row, NLuint col, NLfloat value)
void nlMatrixAdd(NLuint row, NLuint col, NLdouble value)
{
__NLContext *context = __nlCurrentContext;
@ -960,10 +960,10 @@ void nlMatrixAdd(NLuint row, NLuint col, NLfloat value)
}
}
void nlRightHandSideAdd(NLuint rhsindex, NLuint index, NLfloat value)
void nlRightHandSideAdd(NLuint rhsindex, NLuint index, NLdouble value)
{
__NLContext *context = __nlCurrentContext;
NLfloat* b = context->b;
NLdouble* b = context->b;
__nlCheckState(__NL_STATE_MATRIX);
@ -981,10 +981,10 @@ void nlRightHandSideAdd(NLuint rhsindex, NLuint index, NLfloat value)
}
}
void nlRightHandSideSet(NLuint rhsindex, NLuint index, NLfloat value)
void nlRightHandSideSet(NLuint rhsindex, NLuint index, NLdouble value)
{
__NLContext *context = __nlCurrentContext;
NLfloat* b = context->b;
NLdouble* b = context->b;
__nlCheckState(__NL_STATE_MATRIX);
@ -1047,8 +1047,8 @@ static NLboolean __nlFactorize_SUPERLU(__NLContext *context, NLint *permutation)
/* Compressed Row Storage matrix representation */
NLint *xa = __NL_NEW_ARRAY(NLint, n+1);
NLfloat *rhs = __NL_NEW_ARRAY(NLfloat, n);
NLfloat *a = __NL_NEW_ARRAY(NLfloat, nnz);
NLdouble *rhs = __NL_NEW_ARRAY(NLdouble, n);
NLdouble *a = __NL_NEW_ARRAY(NLdouble, nnz);
NLint *asub = __NL_NEW_ARRAY(NLint, nnz);
NLint *etree = __NL_NEW_ARRAY(NLint, n);
@ -1083,7 +1083,7 @@ static NLboolean __nlFactorize_SUPERLU(__NLContext *context, NLint *permutation)
sCreate_CompCol_Matrix(
&At, n, n, nnz, a, asub, xa,
SLU_NC, /* Colum wise, no supernode */
SLU_S, /* floats */
SLU_S, /* doubles */
SLU_GE /* general storage */
);
@ -1136,8 +1136,8 @@ static NLboolean __nlFactorize_SUPERLU(__NLContext *context, NLint *permutation)
static NLboolean __nlInvert_SUPERLU(__NLContext *context) {
/* OpenNL Context */
NLfloat* b = (context->least_squares)? context->Mtb: context->b;
NLfloat* x = context->x;
NLdouble* b = (context->least_squares)? context->Mtb: context->b;
NLdouble* x = context->x;
NLuint n = context->n, j;
/* SuperLU variables */
@ -1149,7 +1149,7 @@ static NLboolean __nlInvert_SUPERLU(__NLContext *context) {
sCreate_Dense_Matrix(
&B, n, 1, b, n,
SLU_DN, /* Fortran-type column-wise storage */
SLU_S, /* floats */
SLU_S, /* doubles */
SLU_GE /* general */
);
@ -1184,12 +1184,12 @@ void nlPrintMatrix(void) {
__NLContext *context = __nlCurrentContext;
__NLSparseMatrix* M = &(context->M);
__NLSparseMatrix* MtM = &(context->MtM);
float *b = context->b;
double *b = context->b;
NLuint i, jj, k;
NLuint m = context->m;
NLuint n = context->n;
__NLRowColumn* Ri = NULL;
float *value = malloc(sizeof(*value)*(n+m));
double *value = malloc(sizeof(*value)*(n+m));
printf("A:\n");
for(i=0; i<m; i++) {

View File

@ -29,9 +29,9 @@
/*
* Function prototypes
*/
void susolve(int, int, float*, float*);
void slsolve(int, int, float*, float*);
void smatvec(int, int, int, float*, float*, float*);
void susolve(int, int, double*, double*);
void slsolve(int, int, double*, double*);
void smatvec(int, int, int, double*, double*, double*);
@ -42,8 +42,8 @@ int
scolumn_bmod (
const int jcol, /* in */
const int nseg, /* in */
float *dense, /* in */
float *tempv, /* working array */
double *dense, /* in */
double *tempv, /* working array */
int *segrep, /* in */
int *repfnz, /* in */
int fpanelc, /* in -- first column in the current panel */
@ -67,7 +67,7 @@ scolumn_bmod (
#ifdef USE_VENDOR_BLAS
int incx = 1, incy = 1;
float alpha, beta;
double alpha, beta;
#endif
/* krep = representative of current k-th supernode
@ -78,7 +78,7 @@ scolumn_bmod (
* kfnz = first nonz in the k-th supernodal segment
* no_zeros = no of leading zeros in a supernodal U-segment
*/
float ukj, ukj1, ukj2;
double ukj, ukj1, ukj2;
int luptr, luptr1, luptr2;
int fsupc, nsupc, nsupr, segsze;
int nrow; /* No of rows in the matrix of matrix-vector */
@ -91,14 +91,14 @@ scolumn_bmod (
panel and the first column of the current snode. */
int *xsup, *supno;
int *lsub, *xlsub;
float *lusup;
double *lusup;
int *xlusup;
int nzlumax;
float *tempv1;
float zero = 0.0;
double *tempv1;
double zero = 0.0;
#ifdef USE_VENDOR_BLAS
float one = 1.0;
float none = -1.0;
double one = 1.0;
double none = -1.0;
#endif
int mem_error;
flops_t *ops = stat->ops;

View File

@ -33,7 +33,7 @@ scopy_to_ucol(
int *segrep, /* in */
int *repfnz, /* in */
int *perm_r, /* in */
float *dense, /* modified - reset to zero on return */
double *dense, /* modified - reset to zero on return */
GlobalLU_t *Glu /* modified */
)
{
@ -47,11 +47,11 @@ scopy_to_ucol(
int new_next, mem_error;
int *xsup, *supno;
int *lsub, *xlsub;
float *ucol;
double *ucol;
int *usub, *xusub;
int nzumax;
float zero = 0.0;
double zero = 0.0;
xsup = Glu->xsup;
supno = Glu->supno;

View File

@ -119,7 +119,7 @@ sgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
* On exit, the solution matrix if info = 0;
*
* stat (output) SuperLUStat_t*
* Record the statistics on runtime and floating-point operation count.
* Record the statistics on runtime and doubleing-point operation count.
* See util.h for the definition of 'SuperLUStat_t'.
*
* info (output) int*

View File

@ -56,7 +56,7 @@ sgstrf (superlu_options_t *options, SuperMatrix *A,
* (A->nrow, A->ncol). The type of A can be:
* Stype = SLU_NCP; Dtype = SLU_S; Mtype = SLU_GE.
*
* drop_tol (input) float (NOT IMPLEMENTED)
* drop_tol (input) double (NOT IMPLEMENTED)
* Drop tolerance parameter. At step j of the Gaussian elimination,
* if abs(A_ij)/(max_i abs(A_ij)) < drop_tol, drop entry A_ij.
* 0 <= drop_tol <= 1. The default value of drop_tol is 0.
@ -119,7 +119,7 @@ sgstrf (superlu_options_t *options, SuperMatrix *A,
* Dtype = SLU_S, Mtype = SLU_TRU.
*
* stat (output) SuperLUStat_t*
* Record the statistics on runtime and floating-point operation count.
* Record the statistics on runtime and doubleing-point operation count.
* See util.h for the definition of 'SuperLUStat_t'.
*
* info (output) int*
@ -189,14 +189,14 @@ sgstrf (superlu_options_t *options, SuperMatrix *A,
used when options->Fact == SamePattern_SameRowPerm */
int *iperm_c; /* inverse of perm_c */
int *iwork;
float *swork;
double *swork;
int *segrep, *repfnz, *parent, *xplore;
int *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */
int *xprune;
int *marker;
float *dense, *tempv;
double *dense, *tempv;
int *relax_end;
float *a;
double *a;
int *asub;
int *xa_begin, *xa_end;
int *xsup, *supno;

View File

@ -28,10 +28,10 @@
/*
* Function prototypes
*/
void susolve(int, int, float*, float*);
void slsolve(int, int, float*, float*);
void smatvec(int, int, int, float*, float*, float*);
void sprint_soln(int , float *);
void susolve(int, int, double*, double*);
void slsolve(int, int, double*, double*);
void smatvec(int, int, int, double*, double*, double*);
void sprint_soln(int , double *);
void
sgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
@ -82,7 +82,7 @@ sgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
* On exit, the solution matrix if info = 0;
*
* stat (output) SuperLUStat_t*
* Record the statistics on runtime and floating-point operation count.
* Record the statistics on runtime and doubleing-point operation count.
* See util.h for the definition of 'SuperLUStat_t'.
*
* info (output) int*
@ -94,17 +94,17 @@ sgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
_fcd ftcs1, ftcs2, ftcs3, ftcs4;
#endif
#ifdef USE_VENDOR_BLAS
float alpha = 1.0, beta = 1.0;
float *work_col;
double alpha = 1.0, beta = 1.0;
double *work_col;
#endif
DNformat *Bstore;
float *Bmat;
double *Bmat;
SCformat *Lstore;
NCformat *Ustore;
float *Lval, *Uval;
double *Lval, *Uval;
int fsupc, nrow, nsupr, nsupc, luptr, istart, irow;
int i, j, k, iptr, jcol, n, ldb, nrhs;
float *work, *rhs_work, *soln;
double *work, *rhs_work, *soln;
flops_t solve_ops;
void sprint_soln();
@ -130,9 +130,9 @@ sgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
}
n = L->nrow;
work = floatCalloc(n * nrhs);
work = doubleCalloc(n * nrhs);
if ( !work ) ABORT("Malloc fails for local work[].");
soln = floatMalloc(n);
soln = doubleMalloc(n);
if ( !soln ) ABORT("Malloc fails for local soln[].");
Bmat = Bstore->nzval;
@ -325,7 +325,7 @@ sgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
* Diagnostic print of the solution vector
*/
void
sprint_soln(int n, float *soln)
sprint_soln(int n, double *soln)
{
int i;

View File

@ -24,8 +24,8 @@
/* Internal prototypes */
void *sexpand (int *, MemType,int, int, GlobalLU_t *);
int sLUWorkInit (int, int, int, int **, float **, LU_space_t);
void copy_mem_float (int, void *, void *);
int sLUWorkInit (int, int, int, int **, double **, LU_space_t);
void copy_mem_double (int, void *, void *);
void sStackCompress (GlobalLU_t *);
void sSetupSpace (void *, int, LU_space_t *);
void *suser_malloc (int, int);
@ -59,7 +59,7 @@ static int no_expand;
#define NotDoubleAlign(addr) ( (intptr_t)addr & 7 )
#define DoubleAlign(addr) ( ((intptr_t)addr + 7) & ~7L )
#define TempSpace(m, w) ( (2*w + 4 + NO_MARKER) * m * sizeof(int) + \
(w + 1) * m * sizeof(float) )
(w + 1) * m * sizeof(double) )
#define Reduce(alpha) ((alpha + 1) / 2) /* i.e. (alpha-1)/2 + 1 */
@ -119,9 +119,9 @@ void suser_free(int bytes, int which_end)
/*
* mem_usage consists of the following fields:
* - for_lu (float)
* - for_lu (double)
* The amount of space used in bytes for the L\U data structures.
* - total_needed (float)
* - total_needed (double)
* The amount of space needed in bytes to perform factorization.
* - expansions (int)
* Number of memory expansions during the LU factorization.
@ -136,17 +136,17 @@ int sQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage)
Ustore = U->Store;
n = L->ncol;
iword = sizeof(int);
dword = sizeof(float);
dword = sizeof(double);
/* For LU factors */
mem_usage->for_lu = (float)( (4*n + 3) * iword + Lstore->nzval_colptr[n] *
mem_usage->for_lu = (double)( (4*n + 3) * iword + Lstore->nzval_colptr[n] *
dword + Lstore->rowind_colptr[n] * iword );
mem_usage->for_lu += (float)( (n + 1) * iword +
mem_usage->for_lu += (double)( (n + 1) * iword +
Ustore->colptr[n] * (dword + iword) );
/* Working storage to support factorization */
mem_usage->total_needed = mem_usage->for_lu +
(float)( (2 * panel_size + 4 + NO_MARKER) * n * iword +
(double)( (2 * panel_size + 4 + NO_MARKER) * n * iword +
(panel_size + 1) * n * dword );
mem_usage->expansions = --no_expand;
@ -165,16 +165,16 @@ int sQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage)
int
sLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
int panel_size, SuperMatrix *L, SuperMatrix *U, GlobalLU_t *Glu,
int **iwork, float **dwork)
int **iwork, double **dwork)
{
int info, iword, dword;
SCformat *Lstore;
NCformat *Ustore;
int *xsup, *supno;
int *lsub, *xlsub;
float *lusup;
double *lusup;
int *xlusup;
float *ucol;
double *ucol;
int *usub, *xusub;
int nzlmax, nzumax, nzlumax;
int FILL = sp_ienv(6);
@ -182,7 +182,7 @@ sLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
Glu->n = n;
no_expand = 0;
iword = sizeof(int);
dword = sizeof(float);
dword = sizeof(double);
if ( !expanders )
expanders = (ExpHeader*)SUPERLU_MALLOC(NO_MEMTYPE * sizeof(ExpHeader));
@ -220,8 +220,8 @@ sLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
xusub = (int *)suser_malloc((n+1) * iword, HEAD);
}
lusup = (float *) sexpand( &nzlumax, LUSUP, 0, 0, Glu );
ucol = (float *) sexpand( &nzumax, UCOL, 0, 0, Glu );
lusup = (double *) sexpand( &nzlumax, LUSUP, 0, 0, Glu );
ucol = (double *) sexpand( &nzumax, UCOL, 0, 0, Glu );
lsub = (int *) sexpand( &nzlmax, LSUB, 0, 0, Glu );
usub = (int *) sexpand( &nzumax, USUB, 0, 1, Glu );
@ -241,8 +241,8 @@ sLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
printf("Not enough memory to perform factorization.\n");
return (smemory_usage(nzlmax, nzumax, nzlumax, n) + n);
}
lusup = (float *) sexpand( &nzlumax, LUSUP, 0, 0, Glu );
ucol = (float *) sexpand( &nzumax, UCOL, 0, 0, Glu );
lusup = (double *) sexpand( &nzlumax, LUSUP, 0, 0, Glu );
ucol = (double *) sexpand( &nzumax, UCOL, 0, 0, Glu );
lsub = (int *) sexpand( &nzlmax, LSUB, 0, 0, Glu );
usub = (int *) sexpand( &nzumax, USUB, 0, 1, Glu );
}
@ -307,16 +307,16 @@ sLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
returns the number of bytes allocated so far when failure occurred. */
int
sLUWorkInit(int m, int n, int panel_size, int **iworkptr,
float **dworkptr, LU_space_t MemModel)
double **dworkptr, LU_space_t MemModel)
{
int isize, dsize, extra;
float *old_ptr;
double *old_ptr;
int maxsuper = sp_ienv(3),
rowblk = sp_ienv(4);
isize = ( (2 * panel_size + 3 + NO_MARKER ) * m + n ) * sizeof(int);
dsize = (m * panel_size +
NUM_TEMPV(m,panel_size,maxsuper,rowblk)) * sizeof(float);
NUM_TEMPV(m,panel_size,maxsuper,rowblk)) * sizeof(double);
if ( MemModel == SYSTEM )
*iworkptr = (int *) intCalloc(isize/sizeof(int));
@ -328,13 +328,13 @@ sLUWorkInit(int m, int n, int panel_size, int **iworkptr,
}
if ( MemModel == SYSTEM )
*dworkptr = (float *) SUPERLU_MALLOC(dsize);
*dworkptr = (double *) SUPERLU_MALLOC(dsize);
else {
*dworkptr = (float *) suser_malloc(dsize, TAIL);
*dworkptr = (double *) suser_malloc(dsize, TAIL);
if ( NotDoubleAlign(*dworkptr) ) {
old_ptr = *dworkptr;
*dworkptr = (float*) DoubleAlign(*dworkptr);
*dworkptr = (float*) ((double*)*dworkptr - 1);
*dworkptr = (double*) DoubleAlign(*dworkptr);
*dworkptr = (double*) ((double*)*dworkptr - 1);
extra = (char*)old_ptr - (char*)*dworkptr;
#ifdef DEBUG
printf("sLUWorkInit: not aligned, extra %d\n", extra);
@ -356,10 +356,10 @@ sLUWorkInit(int m, int n, int panel_size, int **iworkptr,
* Set up pointers for real working arrays.
*/
void
sSetRWork(int m, int panel_size, float *dworkptr,
float **dense, float **tempv)
sSetRWork(int m, int panel_size, double *dworkptr,
double **dense, double **tempv)
{
float zero = 0.0;
double zero = 0.0;
int maxsuper = sp_ienv(3),
rowblk = sp_ienv(4);
@ -372,7 +372,7 @@ sSetRWork(int m, int panel_size, float *dworkptr,
/*
* Free the working storage used by factor routines.
*/
void sLUWorkFree(int *iwork, float *dwork, GlobalLU_t *Glu)
void sLUWorkFree(int *iwork, double *dwork, GlobalLU_t *Glu)
{
if ( Glu->MemModel == SYSTEM ) {
SUPERLU_FREE (iwork);
@ -421,11 +421,11 @@ sLUMemXpand(int jcol,
switch ( mem_type ) {
case LUSUP:
Glu->lusup = (float *) new_mem;
Glu->lusup = (double *) new_mem;
Glu->nzlumax = *maxlen;
break;
case UCOL:
Glu->ucol = (float *) new_mem;
Glu->ucol = (double *) new_mem;
Glu->nzumax = *maxlen;
break;
case LSUB:
@ -445,11 +445,11 @@ sLUMemXpand(int jcol,
void
copy_mem_float(int howmany, void *old, void *new)
copy_mem_double(int howmany, void *old, void *new)
{
register int i;
float *dold = old;
float *dnew = new;
double *dold = old;
double *dnew = new;
for (i = 0; i < howmany; i++) dnew[i] = dold[i];
}
@ -466,8 +466,8 @@ void
GlobalLU_t *Glu /* modified - global LU data structures */
)
{
float EXPAND = 1.5;
float alpha;
double EXPAND = 1.5;
double alpha;
void *new_mem, *old_mem;
int new_len, tries, lword, extra, bytes_to_copy;
@ -480,7 +480,7 @@ void
}
if ( type == LSUB || type == USUB ) lword = sizeof(int);
else lword = sizeof(float);
else lword = sizeof(double);
if ( Glu->MemModel == SYSTEM ) {
new_mem = (void *) SUPERLU_MALLOC(new_len * lword);
@ -501,7 +501,7 @@ void
if ( type == LSUB || type == USUB ) {
copy_mem_int(len_to_copy, expanders[type].mem, new_mem);
} else {
copy_mem_float(len_to_copy, expanders[type].mem, new_mem);
copy_mem_double(len_to_copy, expanders[type].mem, new_mem);
}
SUPERLU_FREE (expanders[type].mem);
}
@ -585,12 +585,12 @@ sStackCompress(GlobalLU_t *Glu)
register int iword, dword, ndim;
char *last, *fragment;
int *ifrom, *ito;
float *dfrom, *dto;
double *dfrom, *dto;
int *xlsub, *lsub, *xusub, *usub, *xlusup;
float *ucol, *lusup;
double *ucol, *lusup;
iword = sizeof(int);
dword = sizeof(float);
dword = sizeof(double);
ndim = Glu->n;
xlsub = Glu->xlsub;
@ -602,8 +602,8 @@ sStackCompress(GlobalLU_t *Glu)
lusup = Glu->lusup;
dfrom = ucol;
dto = (float *)((char*)lusup + xlusup[ndim] * dword);
copy_mem_float(xusub[ndim], dfrom, dto);
dto = (double *)((char*)lusup + xlusup[ndim] * dword);
copy_mem_double(xusub[ndim], dfrom, dto);
ucol = dto;
ifrom = lsub;
@ -637,32 +637,32 @@ sStackCompress(GlobalLU_t *Glu)
* Allocate storage for original matrix A
*/
void
sallocateA(int n, int nnz, float **a, int **asub, int **xa)
sallocateA(int n, int nnz, double **a, int **asub, int **xa)
{
*a = (float *) floatMalloc(nnz);
*a = (double *) doubleMalloc(nnz);
*asub = (int *) intMalloc(nnz);
*xa = (int *) intMalloc(n+1);
}
float *floatMalloc(int n)
double *doubleMalloc(int n)
{
float *buf;
buf = (float *) SUPERLU_MALLOC(n * sizeof(float));
double *buf;
buf = (double *) SUPERLU_MALLOC(n * sizeof(double));
if ( !buf ) {
ABORT("SUPERLU_MALLOC failed for buf in floatMalloc()\n");
ABORT("SUPERLU_MALLOC failed for buf in doubleMalloc()\n");
}
return (buf);
}
float *floatCalloc(int n)
double *doubleCalloc(int n)
{
float *buf;
double *buf;
register int i;
float zero = 0.0;
buf = (float *) SUPERLU_MALLOC(n * sizeof(float));
double zero = 0.0;
buf = (double *) SUPERLU_MALLOC(n * sizeof(double));
if ( !buf ) {
ABORT("SUPERLU_MALLOC failed for buf in floatCalloc()\n");
ABORT("SUPERLU_MALLOC failed for buf in doubleCalloc()\n");
}
for (i = 0; i < n; ++i) buf[i] = zero;
return (buf);
@ -675,7 +675,7 @@ int smemory_usage(const int nzlmax, const int nzumax,
register int iword, dword;
iword = sizeof(int);
dword = sizeof(float);
dword = sizeof(double);
return (10 * n * iword +
nzlmax * iword + nzumax * (iword + dword) + nzlumax * dword);

View File

@ -25,17 +25,17 @@
*/
/* local prototypes*/
void slsolve ( int, int, float *, float *);
void susolve ( int, int, float *, float *);
void smatvec ( int, int, int, float *, float *, float *);
void slsolve ( int, int, double *, double *);
void susolve ( int, int, double *, double *);
void smatvec ( int, int, int, double *, double *, double *);
void slsolve ( int ldm, int ncol, float *M, float *rhs )
void slsolve ( int ldm, int ncol, double *M, double *rhs )
{
int k;
float x0, x1, x2, x3, x4, x5, x6, x7;
float *M0;
register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
double x0, x1, x2, x3, x4, x5, x6, x7;
double *M0;
register double *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
register int firstcol = 0;
M0 = &M[0];
@ -131,10 +131,10 @@ void
susolve ( ldm, ncol, M, rhs )
int ldm; /* in */
int ncol; /* in */
float *M; /* in */
float *rhs; /* modified */
double *M; /* in */
double *rhs; /* modified */
{
float xj;
double xj;
int jcol, j, irow;
jcol = ncol - 1;
@ -162,14 +162,14 @@ void smatvec ( ldm, nrow, ncol, M, vec, Mxvec )
int ldm; /* in -- leading dimension of M */
int nrow; /* in */
int ncol; /* in */
float *M; /* in */
float *vec; /* in */
float *Mxvec; /* in/out */
double *M; /* in */
double *vec; /* in */
double *Mxvec; /* in/out */
{
float vi0, vi1, vi2, vi3, vi4, vi5, vi6, vi7;
float *M0;
register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
double vi0, vi1, vi2, vi3, vi4, vi5, vi6, vi7;
double *M0;
register double *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
register int firstcol = 0;
int k;

View File

@ -29,8 +29,8 @@
/*
* Function prototypes
*/
void slsolve(int, int, float *, float *);
void smatvec(int, int, int, float *, float *, float *);
void slsolve(int, int, double *, double *);
void smatvec(int, int, int, double *, double *, double *);
extern void scheck_tempv();
void
@ -39,8 +39,8 @@ spanel_bmod (
const int w, /* in */
const int jcol, /* in */
const int nseg, /* in */
float *dense, /* out, of size n by w */
float *tempv, /* working array */
double *dense, /* out, of size n by w */
double *tempv, /* working array */
int *segrep, /* in */
int *repfnz, /* in, of size n by w */
GlobalLU_t *Glu, /* modified */
@ -71,13 +71,13 @@ spanel_bmod (
ftcs3 = _cptofcd("U", strlen("U"));
#endif
int incx = 1, incy = 1;
float alpha, beta;
double alpha, beta;
#endif
register int k, ksub;
int fsupc, nsupc, nsupr, nrow;
int krep, krep_ind;
float ukj, ukj1, ukj2;
double ukj, ukj1, ukj2;
int luptr, luptr1, luptr2;
int segsze;
int block_nrow; /* no of rows in a block row */
@ -87,13 +87,13 @@ spanel_bmod (
register int jj; /* Index through each column in the panel */
int *xsup, *supno;
int *lsub, *xlsub;
float *lusup;
double *lusup;
int *xlusup;
int *repfnz_col; /* repfnz[] for a column in the panel */
float *dense_col; /* dense[] for a column in the panel */
float *tempv1; /* Used in 1-D update */
float *TriTmp, *MatvecTmp; /* used in 2-D update */
float zero = 0.0;
double *dense_col; /* dense[] for a column in the panel */
double *tempv1; /* Used in 1-D update */
double *TriTmp, *MatvecTmp; /* used in 2-D update */
double zero = 0.0;
register int ldaTmp;
register int r_ind, r_hi;
static int first = 1, maxsuper, rowblk, colblk;

View File

@ -34,7 +34,7 @@ spanel_dfs (
SuperMatrix *A, /* in - original matrix */
int *perm_r, /* in */
int *nseg, /* out */
float *dense, /* out */
double *dense, /* out */
int *panel_lsub, /* out */
int *segrep, /* out */
int *repfnz, /* out */
@ -74,7 +74,7 @@ spanel_dfs (
*
*/
NCPformat *Astore;
float *a;
double *a;
int *asub;
int *xa_begin, *xa_end;
int krep, chperm, chmark, chrep, oldrep, kchild, myfnz;
@ -84,7 +84,7 @@ spanel_dfs (
int *marker1; /* marker1[jj] >= jcol if vertex jj was visited
by a previous column within this panel. */
int *repfnz_col; /* start of each column in the panel */
float *dense_col; /* start of each column in the panel */
double *dense_col; /* start of each column in the panel */
int nextl_col; /* next available position in panel_lsub[*,jj] */
int *xsup, *supno;
int *lsub, *xlsub;

View File

@ -31,7 +31,7 @@
int
spivotL(
const int jcol, /* in */
const float u, /* in - diagonal pivoting threshold */
const double u, /* in - diagonal pivoting threshold */
int *usepr, /* re-use the pivot sequence given by perm_r/iperm_r */
int *perm_r, /* may be modified */
int *iperm_r, /* in - inverse of perm_r */
@ -67,14 +67,14 @@ spivotL(
int nsupr; /* no of rows in the supernode */
int lptr; /* points to the starting subscript of the supernode */
int pivptr, old_pivptr, diag, diagind;
float pivmax, rtemp, thresh;
float temp;
float *lu_sup_ptr;
float *lu_col_ptr;
double pivmax, rtemp, thresh;
double temp;
double *lu_sup_ptr;
double *lu_col_ptr;
int *lsub_ptr;
int isub, icol, k, itemp;
int *lsub, *xlsub;
float *lusup;
double *lusup;
int *xlusup;
flops_t *ops = stat->ops;

View File

@ -45,13 +45,13 @@ spruneL(
* contains the current pivot row "pivrow"
*
*/
float utemp;
double utemp;
int jsupno, irep, irep1, kmin, kmax, krow, movnum;
int i, ktemp, minloc, maxloc;
int do_prune; /* logical variable */
int *xsup, *supno;
int *lsub, *xlsub;
float *lusup;
double *lusup;
int *xlusup;
xsup = Glu->xsup;

View File

@ -24,8 +24,8 @@
#include "ssp_defs.h"
void slsolve(int, int, float*, float*);
void smatvec(int, int, int, float*, float*, float*);
void slsolve(int, int, double*, double*);
void smatvec(int, int, int, double*, double*, double*);
/*
* Performs numeric block updates within the relaxed snode.
@ -34,8 +34,8 @@ int
ssnode_bmod (
const int jcol, /* in */
const int fsupc, /* in */
float *dense, /* in */
float *tempv, /* working array */
double *dense, /* in */
double *tempv, /* working array */
GlobalLU_t *Glu, /* modified */
SuperLUStat_t *stat /* output */
)
@ -47,14 +47,14 @@ ssnode_bmod (
ftcs3 = _cptofcd("U", strlen("U"));
#endif
int incx = 1, incy = 1;
float alpha = -1.0, beta = 1.0;
double alpha = -1.0, beta = 1.0;
#endif
int luptr, nsupc, nsupr, nrow;
int isub, irow, i, iptr;
register int ufirst, nextlu;
int *lsub, *xlsub;
float *lusup;
double *lusup;
int *xlusup;
flops_t *ops = stat->ops;

View File

@ -19,14 +19,14 @@
/*
* Function prototypes
*/
void susolve(int, int, float*, float*);
void slsolve(int, int, float*, float*);
void smatvec(int, int, int, float*, float*, float*);
int strsv_(char*, char*, char*, int*, float*, int*, float*, int*);
void susolve(int, int, double*, double*);
void slsolve(int, int, double*, double*);
void smatvec(int, int, int, double*, double*, double*);
int strsv_(char*, char*, char*, int*, double*, int*, double*, int*);
int
sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
SuperMatrix *U, float *x, SuperLUStat_t *stat, int *info)
SuperMatrix *U, double *x, SuperLUStat_t *stat, int *info)
{
/*
* Purpose
@ -71,7 +71,7 @@ sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
* The factor U from the factorization Pr*A*Pc=L*U.
* U has types: Stype = NC, Dtype = SLU_S, Mtype = TRU.
*
* x - (input/output) float*
* x - (input/output) double*
* Before entry, the incremented array X must contain the n
* element right-hand side vector b. On exit, X is overwritten
* with the solution vector x.
@ -87,12 +87,12 @@ sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
#endif
SCformat *Lstore;
NCformat *Ustore;
float *Lval, *Uval;
double *Lval, *Uval;
int incx = 1;
int nrow;
int fsupc, nsupr, nsupc, luptr, istart, irow;
int i, k, iptr, jcol;
float *work;
double *work;
flops_t solve_ops;
/* Test the input parameters */
@ -115,7 +115,7 @@ sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
Uval = Ustore->nzval;
solve_ops = 0;
if ( !(work = floatCalloc(L->nrow)) )
if ( !(work = doubleCalloc(L->nrow)) )
ABORT("Malloc fails for work in sp_strsv().");
if ( lsame_(trans, "N") ) { /* Form x := inv(A)*x. */
@ -306,8 +306,8 @@ sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
int
sp_sgemv(char *trans, float alpha, SuperMatrix *A, float *x,
int incx, float beta, float *y, int incy)
sp_sgemv(char *trans, double alpha, SuperMatrix *A, double *x,
int incx, double beta, double *y, int incy)
{
/* Purpose
=======
@ -327,7 +327,7 @@ sp_sgemv(char *trans, float alpha, SuperMatrix *A, float *x,
TRANS = 'T' or 't' y := alpha*A'*x + beta*y.
TRANS = 'C' or 'c' y := alpha*A'*x + beta*y.
ALPHA - (input) float
ALPHA - (input) double
On entry, ALPHA specifies the scalar alpha.
A - (input) SuperMatrix*
@ -336,7 +336,7 @@ sp_sgemv(char *trans, float alpha, SuperMatrix *A, float *x,
Stype = NC or NCP; Dtype = SLU_S; Mtype = GE.
In the future, more general A can be handled.
X - (input) float*, array of DIMENSION at least
X - (input) double*, array of DIMENSION at least
( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
and at least
( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
@ -347,11 +347,11 @@ sp_sgemv(char *trans, float alpha, SuperMatrix *A, float *x,
On entry, INCX specifies the increment for the elements of
X. INCX must not be zero.
BETA - (input) float
BETA - (input) double
On entry, BETA specifies the scalar beta. When BETA is
supplied as zero then Y need not be set on input.
Y - (output) float*, array of DIMENSION at least
Y - (output) double*, array of DIMENSION at least
( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
and at least
( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
@ -368,9 +368,9 @@ sp_sgemv(char *trans, float alpha, SuperMatrix *A, float *x,
/* Local variables */
NCformat *Astore;
float *Aval;
double *Aval;
int info;
float temp;
double temp;
int lenx, leny, i, j, irow;
int iy, jx, jy, kx, ky;
int notran;

View File

@ -20,8 +20,8 @@
int
sp_sgemm(char *transa, int n,
float alpha, SuperMatrix *A, float *b, int ldb,
float beta, float *c, int ldc)
double alpha, SuperMatrix *A, double *b, int ldb,
double beta, double *c, int ldc)
{
/* Purpose
=======
@ -74,7 +74,7 @@ sp_sgemm(char *transa, int n,
be at least zero.
Unchanged on exit.
ALPHA - (input) float
ALPHA - (input) double
On entry, ALPHA specifies the scalar alpha.
A - (input) SuperMatrix*
@ -96,7 +96,7 @@ sp_sgemm(char *transa, int n,
in the calling (sub) program. LDB must be at least max( 1, n ).
Unchanged on exit.
BETA - (input) float
BETA - (input) double
On entry, BETA specifies the scalar beta. When BETA is
supplied as zero then C need not be set on input.

View File

@ -88,9 +88,9 @@ typedef struct {
int *supno;
int *lsub; /* compressed L subscripts */
int *xlsub;
float *lusup; /* L supernodes */
double *lusup; /* L supernodes */
int *xlusup;
float *ucol; /* U columns */
double *ucol; /* U columns */
int *usub;
int *xusub;
int nzlmax; /* current max size of lsub */
@ -101,8 +101,8 @@ typedef struct {
} GlobalLU_t;
typedef struct {
float for_lu;
float total_needed;
double for_lu;
double total_needed;
int expansions;
} mem_usage_t;
@ -116,61 +116,61 @@ sgssv(superlu_options_t *, SuperMatrix *, int *, int *, SuperMatrix *,
SuperMatrix *, SuperMatrix *, SuperLUStat_t *, int *);
extern void
sgssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *,
char *, float *, float *, SuperMatrix *, SuperMatrix *,
char *, double *, double *, SuperMatrix *, SuperMatrix *,
void *, int, SuperMatrix *, SuperMatrix *,
float *, float *, float *, float *,
double *, double *, double *, double *,
mem_usage_t *, SuperLUStat_t *, int *);
/* Supernodal LU factor related */
extern void
sCreate_CompCol_Matrix(SuperMatrix *, int, int, int, float *,
sCreate_CompCol_Matrix(SuperMatrix *, int, int, int, double *,
int *, int *, Stype_t, Dtype_t, Mtype_t);
extern void
sCreate_CompRow_Matrix(SuperMatrix *, int, int, int, float *,
sCreate_CompRow_Matrix(SuperMatrix *, int, int, int, double *,
int *, int *, Stype_t, Dtype_t, Mtype_t);
extern void
sCopy_CompCol_Matrix(SuperMatrix *, SuperMatrix *);
extern void
sCreate_Dense_Matrix(SuperMatrix *, int, int, float *, int,
sCreate_Dense_Matrix(SuperMatrix *, int, int, double *, int,
Stype_t, Dtype_t, Mtype_t);
extern void
sCreate_SuperNode_Matrix(SuperMatrix *, int, int, int, float *,
sCreate_SuperNode_Matrix(SuperMatrix *, int, int, int, double *,
int *, int *, int *, int *, int *,
Stype_t, Dtype_t, Mtype_t);
extern void
sCopy_Dense_Matrix(int, int, float *, int, float *, int);
sCopy_Dense_Matrix(int, int, double *, int, double *, int);
extern void countnz (const int, int *, int *, int *, GlobalLU_t *);
extern void fixupL (const int, const int *, GlobalLU_t *);
extern void sallocateA (int, int, float **, int **, int **);
extern void sallocateA (int, int, double **, int **, int **);
extern void sgstrf (superlu_options_t*, SuperMatrix*,
int, int, int*, void *, int, int *, int *,
SuperMatrix *, SuperMatrix *, SuperLUStat_t*, int *);
extern int ssnode_dfs (const int, const int, const int *, const int *,
const int *, int *, int *, GlobalLU_t *);
extern int ssnode_bmod (const int, const int, float *,
float *, GlobalLU_t *, SuperLUStat_t*);
extern int ssnode_bmod (const int, const int, double *,
double *, GlobalLU_t *, SuperLUStat_t*);
extern void spanel_dfs (const int, const int, const int, SuperMatrix *,
int *, int *, float *, int *, int *, int *,
int *, int *, double *, int *, int *, int *,
int *, int *, int *, int *, GlobalLU_t *);
extern void spanel_bmod (const int, const int, const int, const int,
float *, float *, int *, int *,
double *, double *, int *, int *,
GlobalLU_t *, SuperLUStat_t*);
extern int scolumn_dfs (const int, const int, int *, int *, int *, int *,
int *, int *, int *, int *, int *, GlobalLU_t *);
extern int scolumn_bmod (const int, const int, float *,
float *, int *, int *, int,
extern int scolumn_bmod (const int, const int, double *,
double *, int *, int *, int,
GlobalLU_t *, SuperLUStat_t*);
extern int scopy_to_ucol (int, int, int *, int *, int *,
float *, GlobalLU_t *);
extern int spivotL (const int, const float, int *, int *,
double *, GlobalLU_t *);
extern int spivotL (const int, const double, int *, int *,
int *, int *, int *, GlobalLU_t *, SuperLUStat_t*);
extern void spruneL (const int, const int *, const int, const int,
const int *, const int *, int *, GlobalLU_t *);
extern void sreadmt (int *, int *, int *, float **, int **, int **);
extern void sGenXtrue (int, int, float *, int);
extern void sFillRHS (trans_t, int, float *, int, SuperMatrix *,
extern void sreadmt (int *, int *, int *, double **, int **, int **);
extern void sGenXtrue (int, int, double *, int);
extern void sFillRHS (trans_t, int, double *, int, SuperMatrix *,
SuperMatrix *);
extern void sgstrs (trans_t, SuperMatrix *, SuperMatrix *, int *, int *,
SuperMatrix *, SuperLUStat_t*, int *);
@ -178,56 +178,56 @@ extern void sgstrs (trans_t, SuperMatrix *, SuperMatrix *, int *, int *,
/* Driver related */
extern void sgsequ (SuperMatrix *, float *, float *, float *,
float *, float *, int *);
extern void slaqgs (SuperMatrix *, float *, float *, float,
float, float, char *);
extern void sgsequ (SuperMatrix *, double *, double *, double *,
double *, double *, int *);
extern void slaqgs (SuperMatrix *, double *, double *, double,
double, double, char *);
extern void sgscon (char *, SuperMatrix *, SuperMatrix *,
float, float *, SuperLUStat_t*, int *);
extern float sPivotGrowth(int, SuperMatrix *, int *,
double, double *, SuperLUStat_t*, int *);
extern double sPivotGrowth(int, SuperMatrix *, int *,
SuperMatrix *, SuperMatrix *);
extern void sgsrfs (trans_t, SuperMatrix *, SuperMatrix *,
SuperMatrix *, int *, int *, char *, float *,
float *, SuperMatrix *, SuperMatrix *,
float *, float *, SuperLUStat_t*, int *);
SuperMatrix *, int *, int *, char *, double *,
double *, SuperMatrix *, SuperMatrix *,
double *, double *, SuperLUStat_t*, int *);
extern int sp_strsv (char *, char *, char *, SuperMatrix *,
SuperMatrix *, float *, SuperLUStat_t*, int *);
extern int sp_sgemv (char *, float, SuperMatrix *, float *,
int, float, float *, int);
SuperMatrix *, double *, SuperLUStat_t*, int *);
extern int sp_sgemv (char *, double, SuperMatrix *, double *,
int, double, double *, int);
extern int sp_sgemm (char *, int, float,
SuperMatrix *, float *, int, float,
float *, int);
extern int sp_sgemm (char *, int, double,
SuperMatrix *, double *, int, double,
double *, int);
/* Memory-related */
extern int sLUMemInit (fact_t, void *, int, int, int, int, int,
SuperMatrix *, SuperMatrix *,
GlobalLU_t *, int **, float **);
extern void sSetRWork (int, int, float *, float **, float **);
extern void sLUWorkFree (int *, float *, GlobalLU_t *);
GlobalLU_t *, int **, double **);
extern void sSetRWork (int, int, double *, double **, double **);
extern void sLUWorkFree (int *, double *, GlobalLU_t *);
extern int sLUMemXpand (int, int, MemType, int *, GlobalLU_t *);
extern float *floatMalloc(int);
extern float *floatCalloc(int);
extern double *doubleMalloc(int);
extern double *doubleCalloc(int);
extern int smemory_usage(const int, const int, const int, const int);
extern int sQuerySpace (SuperMatrix *, SuperMatrix *, mem_usage_t *);
/* Auxiliary routines */
extern void sreadhb(int *, int *, int *, float **, int **, int **);
extern void sCompRow_to_CompCol(int, int, int, float*, int*, int*,
float **, int **, int **);
extern void sfill (float *, int, float);
extern void sinf_norm_error (int, SuperMatrix *, float *);
extern void sreadhb(int *, int *, int *, double **, int **, int **);
extern void sCompRow_to_CompCol(int, int, int, double*, int*, int*,
double **, int **, int **);
extern void sfill (double *, int, double);
extern void sinf_norm_error (int, SuperMatrix *, double *);
extern void PrintPerf (SuperMatrix *, SuperMatrix *, mem_usage_t *,
float, float, float *, float *, char *);
double, double, double *, double *, char *);
/* Routines for debugging */
extern void sPrint_CompCol_Matrix(char *, SuperMatrix *);
extern void sPrint_SuperNode_Matrix(char *, SuperMatrix *);
extern void sPrint_Dense_Matrix(char *, SuperMatrix *);
extern void print_lu_col(char *, int, int, int *, GlobalLU_t *);
extern void check_tempv(int, float *);
extern void check_tempv(int, double *);
extern int print_int_vec(char *what, int n, int *vec);
extern int sp_symetree(int *acolst, int *acolend, int *arow, int n, int *parent);

View File

@ -1,17 +1,17 @@
/** \file opennl/superlu/strsv.c
* \ingroup opennl
*/
int strsv_(char *, char *, char *, int *, float *, int *, float *, int *);
int strsv_(char *, char *, char *, int *, double *, int *, double *, int *);
/* Subroutine */ int strsv_(char *uplo, char *trans, char *diag, int *n,
float *a, int *lda, float *x, int *incx)
double *a, int *lda, double *x, int *incx)
{
/* Local variables */
static int info;
static float temp;
static double temp;
static int i, j;
extern int lsame_(char *, char *);
static int ix, jx, kx;

View File

@ -27,15 +27,15 @@
/* prototypes */
void sprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu);
void scheck_tempv(int n, float *tempv);
void sPrintPerf(SuperMatrix *, SuperMatrix *, mem_usage_t *,float , float ,
float *, float *, char *, SuperLUStat_t *);
int print_float_vec(char *what, int n, float *vec);
void scheck_tempv(int n, double *tempv);
void sPrintPerf(SuperMatrix *, SuperMatrix *, mem_usage_t *,double , double ,
double *, double *, char *, SuperLUStat_t *);
int print_double_vec(char *what, int n, double *vec);
/* ********** */
void
sCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
float *nzval, int *rowind, int *colptr,
double *nzval, int *rowind, int *colptr,
Stype_t stype, Dtype_t dtype, Mtype_t mtype)
{
NCformat *Astore;
@ -56,7 +56,7 @@ sCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
void
sCreate_CompRow_Matrix(SuperMatrix *A, int m, int n, int nnz,
float *nzval, int *colind, int *rowptr,
double *nzval, int *colind, int *rowptr,
Stype_t stype, Dtype_t dtype, Mtype_t mtype)
{
NRformat *Astore;
@ -91,14 +91,14 @@ sCopy_CompCol_Matrix(SuperMatrix *A, SuperMatrix *B)
Bstore = (NCformat *) B->Store;
Bstore->nnz = nnz = Astore->nnz;
for (i = 0; i < nnz; ++i)
((float *)Bstore->nzval)[i] = ((float *)Astore->nzval)[i];
((double *)Bstore->nzval)[i] = ((double *)Astore->nzval)[i];
for (i = 0; i < nnz; ++i) Bstore->rowind[i] = Astore->rowind[i];
for (i = 0; i <= ncol; ++i) Bstore->colptr[i] = Astore->colptr[i];
}
void
sCreate_Dense_Matrix(SuperMatrix *X, int m, int n, float *x, int ldx,
sCreate_Dense_Matrix(SuperMatrix *X, int m, int n, double *x, int ldx,
Stype_t stype, Dtype_t dtype, Mtype_t mtype)
{
DNformat *Xstore;
@ -112,12 +112,12 @@ sCreate_Dense_Matrix(SuperMatrix *X, int m, int n, float *x, int ldx,
if ( !(X->Store) ) ABORT("SUPERLU_MALLOC fails for X->Store");
Xstore = (DNformat *) X->Store;
Xstore->lda = ldx;
Xstore->nzval = (float *) x;
Xstore->nzval = (double *) x;
}
void
sCopy_Dense_Matrix(int M, int N, float *X, int ldx,
float *Y, int ldy)
sCopy_Dense_Matrix(int M, int N, double *X, int ldx,
double *Y, int ldy)
{
/*
*
@ -135,7 +135,7 @@ sCopy_Dense_Matrix(int M, int N, float *X, int ldx,
void
sCreate_SuperNode_Matrix(SuperMatrix *L, int m, int n, int nnz,
float *nzval, int *nzval_colptr, int *rowind,
double *nzval, int *nzval_colptr, int *rowind,
int *rowind_colptr, int *col_to_sup, int *sup_to_col,
Stype_t stype, Dtype_t dtype, Mtype_t mtype)
{
@ -166,14 +166,14 @@ sCreate_SuperNode_Matrix(SuperMatrix *L, int m, int n, int nnz,
*/
void
sCompRow_to_CompCol(int m, int n, int nnz,
float *a, int *colind, int *rowptr,
float **at, int **rowind, int **colptr)
double *a, int *colind, int *rowptr,
double **at, int **rowind, int **colptr)
{
register int i, j, col, relpos;
int *marker;
/* Allocate storage for another copy of the matrix. */
*at = (float *) floatMalloc(nnz);
*at = (double *) doubleMalloc(nnz);
*rowind = (int *) intMalloc(nnz);
*colptr = (int *) intMalloc(n+1);
marker = (int *) intCalloc(n);
@ -207,13 +207,13 @@ sPrint_CompCol_Matrix(char *what, SuperMatrix *A)
{
NCformat *Astore;
register int i,n;
float *dp;
double *dp;
printf("\nCompCol matrix %s:\n", what);
printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
n = A->ncol;
Astore = (NCformat *) A->Store;
dp = (float *) Astore->nzval;
dp = (double *) Astore->nzval;
printf("nrow %d, ncol %d, nnz %d\n", A->nrow,A->ncol,Astore->nnz);
printf("nzval: ");
for (i = 0; i < Astore->colptr[n]; ++i) printf("%f ", dp[i]);
@ -230,14 +230,14 @@ sPrint_SuperNode_Matrix(char *what, SuperMatrix *A)
{
SCformat *Astore;
register int i, j, k, c, d, n, nsup;
float *dp;
double *dp;
int *col_to_sup, *sup_to_col, *rowind, *rowind_colptr;
printf("\nSuperNode matrix %s:\n", what);
printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
n = A->ncol;
Astore = (SCformat *) A->Store;
dp = (float *) Astore->nzval;
dp = (double *) Astore->nzval;
col_to_sup = Astore->col_to_sup;
sup_to_col = Astore->sup_to_col;
rowind_colptr = Astore->rowind_colptr;
@ -279,12 +279,12 @@ sPrint_Dense_Matrix(char *what, SuperMatrix *A)
{
DNformat *Astore;
register int i;
float *dp;
double *dp;
printf("\nDense matrix %s:\n", what);
printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
Astore = (DNformat *) A->Store;
dp = (float *) Astore->nzval;
dp = (double *) Astore->nzval;
printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,Astore->lda);
printf("\nnzval: ");
for (i = 0; i < A->nrow; ++i) printf("%f ", dp[i]);
@ -301,9 +301,9 @@ sprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu)
int i, k, fsupc;
int *xsup, *supno;
int *xlsub, *lsub;
float *lusup;
double *lusup;
int *xlusup;
float *ucol;
double *ucol;
int *usub, *xusub;
xsup = Glu->xsup;
@ -339,7 +339,7 @@ sprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu)
* Check whether tempv[] == 0. This should be true before and after
* calling any numeric routines, i.e., "panel_bmod" and "column_bmod".
*/
void scheck_tempv(int n, float *tempv)
void scheck_tempv(int n, double *tempv)
{
int i;
@ -354,12 +354,12 @@ void scheck_tempv(int n, float *tempv)
void
sGenXtrue(int n, int nrhs, float *x, int ldx)
sGenXtrue(int n, int nrhs, double *x, int ldx)
{
int i, j;
for (j = 0; j < nrhs; ++j)
for (i = 0; i < n; ++i) {
x[i + j*ldx] = 1.0;/* + (float)(i+1.)/n;*/
x[i + j*ldx] = 1.0;/* + (double)(i+1.)/n;*/
}
}
@ -367,13 +367,13 @@ sGenXtrue(int n, int nrhs, float *x, int ldx)
* Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's
*/
void
sFillRHS(trans_t trans, int nrhs, float *x, int ldx,
sFillRHS(trans_t trans, int nrhs, double *x, int ldx,
SuperMatrix *A, SuperMatrix *B)
{
DNformat *Bstore;
float *rhs;
float one = 1.0;
float zero = 0.0;
double *rhs;
double one = 1.0;
double zero = 0.0;
int ldc;
char transc[1];
@ -390,10 +390,10 @@ sFillRHS(trans_t trans, int nrhs, float *x, int ldx,
}
/*
* Fills a float precision array with a given value.
* Fills a double precision array with a given value.
*/
void
sfill(float *a, int alen, float dval)
sfill(double *a, int alen, double dval)
{
register int i;
for (i = 0; i < alen; i++) a[i] = dval;
@ -404,11 +404,11 @@ sfill(float *a, int alen, float dval)
/*
* Check the inf-norm of the error vector
*/
void sinf_norm_error(int nrhs, SuperMatrix *X, float *xtrue)
void sinf_norm_error(int nrhs, SuperMatrix *X, double *xtrue)
{
DNformat *Xstore;
float err, xnorm;
float *Xmat, *soln_work;
double err, xnorm;
double *Xmat, *soln_work;
int i, j;
Xstore = X->Store;
@ -431,8 +431,8 @@ void sinf_norm_error(int nrhs, SuperMatrix *X, float *xtrue)
/* Print performance of the code. */
void
sPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage,
float rpg, float rcond, float *ferr,
float *berr, char *equed, SuperLUStat_t *stat)
double rpg, double rcond, double *ferr,
double *berr, char *equed, SuperLUStat_t *stat)
{
SCformat *Lstore;
NCformat *Ustore;
@ -475,7 +475,7 @@ sPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage,
int print_float_vec(char *what, int n, float *vec)
int print_double_vec(char *what, int n, double *vec)
{
int i;
printf("%s: n %d\n", what, n);

View File

@ -28,8 +28,8 @@
/* prototypes */
flops_t LUFactFlops(SuperLUStat_t *stat);
flops_t LUSolveFlops(SuperLUStat_t *stat);
float SpaSize(int n, int np, float sum_npw);
float DenseSize(int n, float sum_nw);
double SpaSize(int n, int np, double sum_npw);
double DenseSize(int n, double sum_nw);
/*
* Global statistics variale
@ -332,27 +332,27 @@ void super_stats(int nsuper, int *xsup)
for (i = 0; i <= nsuper; i++) {
isize = xsup[i+1] - xsup[i];
whichb = (float) isize / max_sup_size * NBUCKS;
whichb = (double) isize / max_sup_size * NBUCKS;
if (whichb >= NBUCKS) whichb = NBUCKS - 1;
bucket[whichb]++;
}
printf("\tHistogram of supernode sizes:\n");
for (i = 0; i < NBUCKS; i++) {
bl = (float) i * max_sup_size / NBUCKS;
bh = (float) (i+1) * max_sup_size / NBUCKS;
bl = (double) i * max_sup_size / NBUCKS;
bh = (double) (i+1) * max_sup_size / NBUCKS;
printf("\tsnode: %d-%d\t\t%d\n", bl+1, bh, bucket[i]);
}
}
float SpaSize(int n, int np, float sum_npw)
double SpaSize(int n, int np, double sum_npw)
{
return (sum_npw*8 + np*8 + n*4)/1024.;
}
float DenseSize(int n, float sum_nw)
double DenseSize(int n, double sum_nw)
{
return (sum_nw*8 + n*8)/1024.;;
}

View File

@ -97,7 +97,7 @@ typedef enum {
RCOND, /* estimate reciprocal condition number */
SOLVE, /* forward and back solves */
REFINE, /* perform iterative refinement */
SLU_FLOAT, /* time spent in floating-point operations */
SLU_FLOAT, /* time spent in doubleing-point operations */
TRSV, /* fraction of FACT spent in xTRSV */
GEMV, /* fraction of FACT spent in xGEMV */
FERR, /* estimate error bounds after iterative refinement */
@ -108,7 +108,7 @@ typedef enum {
/***********************************************************************
* Type definitions
***********************************************************************/
typedef float flops_t;
typedef double flops_t;
typedef unsigned char Logical;
/*