OpenNL: modify SuperLU to use doubles rather than floats, for better precision.

This helps to improve the accuracy of UV unwrapping and laplacian deform for high poly meshes, which could get warped quite badly. It's not much slower, doubles are pretty fast on modern CPUs, but it does double memory usage. This seems acceptable as otherwise high poly meshes would not work correctly anyway. Fixes T39004.
Referenced by issue #39004, Laplacian Deform incorrect with > 10K faces
2014-09-20 19:51:34 +02:00 · 2014-09-20 19:51:34 +02:00 · 0b12e61040 · 2023-02-14 11:03:49 +01:00
parent 87c27ef92f
commit 0b12e61040
21 changed files with 311 additions and 315 deletions
--- a/intern/opennl/extern/ONL_opennl.h
+++ b/intern/opennl/extern/ONL_opennl.h
@ -100,11 +100,11 @@ NLContext nlGetCurrent(void);

 /* State get/set */

-void nlSolverParameterf(NLenum pname, NLfloat param);
+void nlSolverParameterf(NLenum pname, NLdouble param);
 void nlSolverParameteri(NLenum pname, NLint param);

 void nlGetBooleanv(NLenum pname, NLboolean* params);
-void nlGetFloatv(NLenum pname, NLfloat* params);
+void nlGetFloatv(NLenum pname, NLdouble* params);
 void nlGetIntergerv(NLenum pname, NLint* params);

 void nlEnable(NLenum pname);
@ -113,8 +113,8 @@ NLboolean nlIsEnabled(NLenum pname);

 /* Variables */

-void nlSetVariable(NLuint rhsindex, NLuint index, NLfloat value);
-NLfloat nlGetVariable(NLuint rhsindex, NLuint index);
+void nlSetVariable(NLuint rhsindex, NLuint index, NLdouble value);
+NLdouble nlGetVariable(NLuint rhsindex, NLuint index);
 void nlLockVariable(NLuint index);
 void nlUnlockVariable(NLuint index);
 NLboolean nlVariableIsLocked(NLuint index);
@ -126,13 +126,9 @@ void nlEnd(NLenum primitive);

 /* Setting elements in matrix/vector */

-void nlMatrixAdd(NLuint row, NLuint col, NLfloat value);
-void nlRightHandSideAdd(NLuint rhsindex, NLuint index, NLfloat value);
-void nlRightHandSideSet(NLuint rhsindex, NLuint index, NLfloat value);
-
-/* Multiply */
-
-void nlMatrixMultiply(NLfloat *x, NLfloat *y);
+void nlMatrixAdd(NLuint row, NLuint col, NLdouble value);
+void nlRightHandSideAdd(NLuint rhsindex, NLuint index, NLdouble value);
+void nlRightHandSideSet(NLuint rhsindex, NLuint index, NLdouble value);

 /* Solve */

--- a/intern/opennl/intern/opennl.c
+++ b/intern/opennl/intern/opennl.c
@ -69,7 +69,7 @@ static void __nl_assertion_failed(char* cond, char* file, int line) {
 }

 static void __nl_range_assertion_failed(
-	float x, float min_val, float max_val, char* file, int line
+	double x, double min_val, double max_val, char* file, int line
 ) {
 	fprintf(
 		stderr, 
@ -151,7 +151,7 @@ static void __nl_should_not_have_reached(char* file, int line) {

 typedef struct {
 	NLuint   index;
-	NLfloat value;
+	NLdouble value;
 } __NLCoeff;

 typedef struct {
@ -183,7 +183,7 @@ static void __nlRowColumnGrow(__NLRowColumn* c) {
 	}
 }

-static void __nlRowColumnAdd(__NLRowColumn* c, NLint index, NLfloat value) {
+static void __nlRowColumnAdd(__NLRowColumn* c, NLint index, NLdouble value) {
 	NLuint i;
 	for(i=0; i<c->size; i++) {
 		if(c->coeff[i].index == (NLuint)index) {
@ -200,7 +200,7 @@ static void __nlRowColumnAdd(__NLRowColumn* c, NLint index, NLfloat value) {
 }

 /* Does not check whether the index already exists */
-static void __nlRowColumnAppend(__NLRowColumn* c, NLint index, NLfloat value) {
+static void __nlRowColumnAppend(__NLRowColumn* c, NLint index, NLdouble value) {
 	if(c->size == c->capacity) {
 		__nlRowColumnGrow(c);
 	}
@ -229,7 +229,7 @@ typedef struct {
 	NLenum storage;
 	__NLRowColumn* row;
 	__NLRowColumn* column;
-	NLfloat*	  diag;
+	NLdouble*	  diag;
 } __NLSparseMatrix;


@ -259,7 +259,7 @@ static void __nlSparseMatrixConstruct(
 	}

 	M->diag_size = MIN(m,n);
-	M->diag = __NL_NEW_ARRAY(NLfloat, M->diag_size);
+	M->diag = __NL_NEW_ARRAY(NLdouble, M->diag_size);
 }

 static void __nlSparseMatrixDestroy(__NLSparseMatrix* M) {
@ -283,7 +283,7 @@ static void __nlSparseMatrixDestroy(__NLSparseMatrix* M) {
 }

 static void __nlSparseMatrixAdd(
-	__NLSparseMatrix* M, NLuint i, NLuint j, NLfloat value
+	__NLSparseMatrix* M, NLuint i, NLuint j, NLdouble value
 ) {
 	__nl_parano_range_assert(i, 0, M->m - 1);
 	__nl_parano_range_assert(j, 0, M->n - 1);
@ -313,7 +313,7 @@ static void __nlSparseMatrixClear( __NLSparseMatrix* M) {
 			__nlRowColumnClear(&(M->column[i]));
 		}
 	}
-	__NL_CLEAR_ARRAY(NLfloat, M->diag, M->diag_size);	
+	__NL_CLEAR_ARRAY(NLdouble, M->diag, M->diag_size);	
 }

 /* Returns the number of non-zero coefficients */
@ -338,7 +338,7 @@ static NLuint __nlSparseMatrixNNZ( __NLSparseMatrix* M) {
 /* SparseMatrix x Vector routines, internal helper routines */

 static void __nlSparseMatrix_mult_rows_symmetric(
-	__NLSparseMatrix* A, NLfloat* x, NLfloat* y
+	__NLSparseMatrix* A, NLdouble* x, NLdouble* y
 ) {
 	NLuint m = A->m;
 	NLuint i,ij;
@ -358,7 +358,7 @@ static void __nlSparseMatrix_mult_rows_symmetric(
 }

 static void __nlSparseMatrix_mult_rows(
-	__NLSparseMatrix* A, NLfloat* x, NLfloat* y
+	__NLSparseMatrix* A, NLdouble* x, NLdouble* y
 ) {
 	NLuint m = A->m;
 	NLuint i,ij;
@ -375,7 +375,7 @@ static void __nlSparseMatrix_mult_rows(
 }

 static void __nlSparseMatrix_mult_cols_symmetric(
-	__NLSparseMatrix* A, NLfloat* x, NLfloat* y
+	__NLSparseMatrix* A, NLdouble* x, NLdouble* y
 ) {
 	NLuint n = A->n;
 	NLuint j,ii;
@ -395,13 +395,13 @@ static void __nlSparseMatrix_mult_cols_symmetric(
 }

 static void __nlSparseMatrix_mult_cols(
-	__NLSparseMatrix* A, NLfloat* x, NLfloat* y
+	__NLSparseMatrix* A, NLdouble* x, NLdouble* y
 ) {
 	NLuint n = A->n;
 	NLuint j,ii; 
 	__NLRowColumn* Cj = NULL;
 	__NLCoeff* c = NULL;
-	__NL_CLEAR_ARRAY(NLfloat, y, A->m);
+	__NL_CLEAR_ARRAY(NLdouble, y, A->m);
 	for(j=0; j<n; j++) {
 		Cj = &(A->column[j]);
 		for(ii=0; ii<Cj->size; ii++) {
@ -414,7 +414,7 @@ static void __nlSparseMatrix_mult_cols(
 /************************************************************************************/
 /* SparseMatrix x Vector routines, main driver routine */

-static void __nlSparseMatrixMult(__NLSparseMatrix* A, NLfloat* x, NLfloat* y) {
+static void __nlSparseMatrixMult(__NLSparseMatrix* A, NLdouble* x, NLdouble* y) {
 	if(A->storage & __NL_ROWS) {
 		if(A->storage & __NL_SYMMETRIC) {
 			__nlSparseMatrix_mult_rows_symmetric(A, x, y);
@ -440,7 +440,7 @@ static void __nlSparseMatrix_square(
 	NLuint i, j0, j1;
 	__NLRowColumn *Ri = NULL;
 	__NLCoeff *c0 = NULL, *c1 = NULL;
-	float value;
+	double value;

 	__nlSparseMatrixConstruct(AtA, n, n, A->storage);

@ -460,7 +460,7 @@ static void __nlSparseMatrix_square(
 }

 static void __nlSparseMatrix_transpose_mult_rows(
-	__NLSparseMatrix* A, NLfloat* x, NLfloat* y
+	__NLSparseMatrix* A, NLdouble* x, NLdouble* y
 ) {
 	NLuint m = A->m;
 	NLuint n = A->n;
@ -468,7 +468,7 @@ static void __nlSparseMatrix_transpose_mult_rows(
 	__NLRowColumn* Ri = NULL;
 	__NLCoeff* c = NULL;

-	__NL_CLEAR_ARRAY(NLfloat, y, n);
+	__NL_CLEAR_ARRAY(NLdouble, y, n);

 	for(i=0; i<m; i++) {
 		Ri = &(A->row[i]);
@ -482,10 +482,10 @@ static void __nlSparseMatrix_transpose_mult_rows(
 /************************************************************************************/
 /* NLContext data structure */

-typedef void(*__NLMatrixFunc)(float* x, float* y);
+typedef void(*__NLMatrixFunc)(double* x, double* y);

 typedef struct {
-	NLfloat  value[4];
+	NLdouble  value[4];
 	NLboolean locked;
 	NLuint	index;
 	__NLRowColumn *a;
@ -503,11 +503,11 @@ typedef struct {
 	NLuint			n;
 	NLuint			m;
 	__NLVariable*	variable;
-	NLfloat*		b;
-	NLfloat*		Mtb;
+	NLdouble*		b;
+	NLdouble*		Mtb;
 	__NLSparseMatrix M;
 	__NLSparseMatrix MtM;
-	NLfloat*		x;
+	NLdouble*		x;
 	NLuint			nb_variables;
 	NLuint			nb_rows;
 	NLboolean		least_squares;
@ -520,7 +520,7 @@ typedef struct {
 	NLboolean		alloc_x;
 	NLboolean		alloc_b;
 	NLboolean		alloc_Mtb;
-	NLfloat			error;
+	NLdouble			error;
 	__NLMatrixFunc	matrix_vector_prod;

 	struct __NLSuperLUContext {
@ -533,7 +533,7 @@ typedef struct {

 static __NLContext* __nlCurrentContext = NULL;

-static void __nlMatrixVectorProd_default(NLfloat* x, NLfloat* y) {
+static void __nlMatrixVectorProd_default(NLdouble* x, NLdouble* y) {
 	__nlSparseMatrixMult(&(__nlCurrentContext->M), x, y);
 }

@ -611,7 +611,7 @@ static void __nlTransition(NLenum from_state, NLenum to_state) {
 /************************************************************************************/
 /* Get/Set parameters */

-void nlSolverParameterf(NLenum pname, NLfloat param) {
+void nlSolverParameterf(NLenum pname, NLdouble param) {
 	__nlCheckState(__NL_STATE_INITIAL);
 	switch(pname) {
 	case NL_NB_VARIABLES: {
@ -677,22 +677,22 @@ void nlGetBooleanv(NLenum pname, NLboolean* params) {
 	}
 }

-void nlGetFloatv(NLenum pname, NLfloat* params) {
+void nlGetFloatv(NLenum pname, NLdouble* params) {
 	switch(pname) {
 	case NL_NB_VARIABLES: {
-		*params = (NLfloat)(__nlCurrentContext->nb_variables);
+		*params = (NLdouble)(__nlCurrentContext->nb_variables);
 	} break;
 	case NL_NB_ROWS: {
-		*params = (NLfloat)(__nlCurrentContext->nb_rows);
+		*params = (NLdouble)(__nlCurrentContext->nb_rows);
 	} break;
 	case NL_LEAST_SQUARES: {
-		*params = (NLfloat)(__nlCurrentContext->least_squares);
+		*params = (NLdouble)(__nlCurrentContext->least_squares);
 	} break;
 	case NL_SYMMETRIC: {
-		*params = (NLfloat)(__nlCurrentContext->symmetric);
+		*params = (NLdouble)(__nlCurrentContext->symmetric);
 	} break;
 	case NL_ERROR: {
-		*params = (NLfloat)(__nlCurrentContext->error);
+		*params = (NLdouble)(__nlCurrentContext->error);
 	} break;
 	default: {
 		__nl_assert_not_reached;
@ -751,13 +751,13 @@ NLboolean nlIsEnabled(NLenum pname) {
 /************************************************************************************/
 /* Get/Set Lock/Unlock variables */

-void nlSetVariable(NLuint rhsindex, NLuint index, NLfloat value) {
+void nlSetVariable(NLuint rhsindex, NLuint index, NLdouble value) {
 	__nlCheckState(__NL_STATE_SYSTEM);
 	__nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
 	__nlCurrentContext->variable[index].value[rhsindex] = value;	
 }

-NLfloat nlGetVariable(NLuint rhsindex, NLuint index) {
+NLdouble nlGetVariable(NLuint rhsindex, NLuint index) {
 	__nl_assert(__nlCurrentContext->state != __NL_STATE_INITIAL);
 	__nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
 	return __nlCurrentContext->variable[index].value[rhsindex];
@ -870,15 +870,15 @@ static void __nlBeginMatrix() {
 		__nlSparseMatrixConstruct(&context->M, m, n, storage);
 		context->alloc_M = NL_TRUE;

-		context->b = __NL_NEW_ARRAY(NLfloat, m*context->nb_rhs);
+		context->b = __NL_NEW_ARRAY(NLdouble, m*context->nb_rhs);
 		context->alloc_b = NL_TRUE;

-		context->x = __NL_NEW_ARRAY(NLfloat, n*context->nb_rhs);
+		context->x = __NL_NEW_ARRAY(NLdouble, n*context->nb_rhs);
 		context->alloc_x = NL_TRUE;
 	}
 	else {
 		/* need to recompute b only, A is not constructed anymore */
-		__NL_CLEAR_ARRAY(NLfloat, context->b, context->m*context->nb_rhs);
+		__NL_CLEAR_ARRAY(NLdouble, context->b, context->m*context->nb_rhs);
 	}

 	__nlVariablesToVector();
@ -888,7 +888,7 @@ static void __nlEndMatrixRHS(NLuint rhs) {
 	__NLContext *context = __nlCurrentContext;
 	__NLVariable *variable;
 	__NLRowColumn *a;
-	NLfloat *b, *Mtb;
+	NLdouble *b, *Mtb;
 	NLuint i, j;

 	b = context->b + context->m*rhs;
@ -922,7 +922,7 @@ static void __nlEndMatrix() {
 			context->alloc_MtM = NL_TRUE;

 			context->Mtb =
-				__NL_NEW_ARRAY(NLfloat, context->n*context->nb_rhs);
+				__NL_NEW_ARRAY(NLdouble, context->n*context->nb_rhs);
 			context->alloc_Mtb = NL_TRUE;
 		}
 	}
@ -931,7 +931,7 @@ static void __nlEndMatrix() {
 		__nlEndMatrixRHS(i);
 }

-void nlMatrixAdd(NLuint row, NLuint col, NLfloat value)
+void nlMatrixAdd(NLuint row, NLuint col, NLdouble value)
 {
 	__NLContext *context = __nlCurrentContext;

@ -960,10 +960,10 @@ void nlMatrixAdd(NLuint row, NLuint col, NLfloat value)
 	}
 }

-void nlRightHandSideAdd(NLuint rhsindex, NLuint index, NLfloat value)
+void nlRightHandSideAdd(NLuint rhsindex, NLuint index, NLdouble value)
 {
 	__NLContext *context = __nlCurrentContext;
-	NLfloat* b = context->b;
+	NLdouble* b = context->b;

 	__nlCheckState(__NL_STATE_MATRIX);

@ -981,10 +981,10 @@ void nlRightHandSideAdd(NLuint rhsindex, NLuint index, NLfloat value)
 	}
 }

-void nlRightHandSideSet(NLuint rhsindex, NLuint index, NLfloat value)
+void nlRightHandSideSet(NLuint rhsindex, NLuint index, NLdouble value)
 {
 	__NLContext *context = __nlCurrentContext;
-	NLfloat* b = context->b;
+	NLdouble* b = context->b;

 	__nlCheckState(__NL_STATE_MATRIX);

@ -1047,8 +1047,8 @@ static NLboolean __nlFactorize_SUPERLU(__NLContext *context, NLint *permutation)

 	/* Compressed Row Storage matrix representation */
 	NLint	*xa		= __NL_NEW_ARRAY(NLint, n+1);
-	NLfloat	*rhs	= __NL_NEW_ARRAY(NLfloat, n);
-	NLfloat	*a		= __NL_NEW_ARRAY(NLfloat, nnz);
+	NLdouble	*rhs	= __NL_NEW_ARRAY(NLdouble, n);
+	NLdouble	*a		= __NL_NEW_ARRAY(NLdouble, nnz);
 	NLint	*asub	= __NL_NEW_ARRAY(NLint, nnz);
 	NLint	*etree	= __NL_NEW_ARRAY(NLint, n);

@ -1083,7 +1083,7 @@ static NLboolean __nlFactorize_SUPERLU(__NLContext *context, NLint *permutation)
 	sCreate_CompCol_Matrix(
 		&At, n, n, nnz, a, asub, xa, 
 		SLU_NC,		/* Colum wise, no supernode */
-		SLU_S,		/* floats */ 
+		SLU_S,		/* doubles */ 
 		SLU_GE		/* general storage */
 	);

@ -1136,8 +1136,8 @@ static NLboolean __nlFactorize_SUPERLU(__NLContext *context, NLint *permutation)
 static NLboolean __nlInvert_SUPERLU(__NLContext *context) {

 	/* OpenNL Context */
-	NLfloat* b = (context->least_squares)? context->Mtb: context->b;
-	NLfloat* x = context->x;
+	NLdouble* b = (context->least_squares)? context->Mtb: context->b;
+	NLdouble* x = context->x;
 	NLuint n = context->n, j;

 	/* SuperLU variables */
@ -1149,7 +1149,7 @@ static NLboolean __nlInvert_SUPERLU(__NLContext *context) {
 		sCreate_Dense_Matrix(
 			&B, n, 1, b, n, 
 			SLU_DN, /* Fortran-type column-wise storage */
-			SLU_S,  /* floats						  */
+			SLU_S,  /* doubles						  */
 			SLU_GE  /* general						  */
 		);

@ -1184,12 +1184,12 @@ void nlPrintMatrix(void) {
 	__NLContext *context = __nlCurrentContext;
 	__NLSparseMatrix* M  = &(context->M);
 	__NLSparseMatrix* MtM  = &(context->MtM);
-	float *b = context->b;
+	double *b = context->b;
 	NLuint i, jj, k;
 	NLuint m = context->m;
 	NLuint n = context->n;
 	__NLRowColumn* Ri = NULL;
-	float *value = malloc(sizeof(*value)*(n+m));
+	double *value = malloc(sizeof(*value)*(n+m));

 	printf("A:\n");
 	for(i=0; i<m; i++) {
--- a/intern/opennl/superlu/scolumn_bmod.c
+++ b/intern/opennl/superlu/scolumn_bmod.c
@ -29,9 +29,9 @@
 /* 
 * Function prototypes 
 */
-void susolve(int, int, float*, float*);
-void slsolve(int, int, float*, float*);
-void smatvec(int, int, int, float*, float*, float*);
+void susolve(int, int, double*, double*);
+void slsolve(int, int, double*, double*);
+void smatvec(int, int, int, double*, double*, double*);



@ -42,8 +42,8 @@ int
 scolumn_bmod (
 	     const int  jcol,	  /* in */
 	     const int  nseg,	  /* in */
-	     float     *dense,	  /* in */
-	     float     *tempv,	  /* working array */
+	     double     *dense,	  /* in */
+	     double     *tempv,	  /* working array */
 	     int        *segrep,  /* in */
 	     int        *repfnz,  /* in */
 	     int        fpanelc,  /* in -- first column in the current panel */
@ -67,7 +67,7 @@ scolumn_bmod (

 #ifdef USE_VENDOR_BLAS
    int         incx = 1, incy = 1;
-    float      alpha, beta;
+    double      alpha, beta;
 #endif
    
    /* krep = representative of current k-th supernode
@ -78,7 +78,7 @@ scolumn_bmod (
     * kfnz = first nonz in the k-th supernodal segment
     * no_zeros = no of leading zeros in a supernodal U-segment
     */
-    float       ukj, ukj1, ukj2;
+    double       ukj, ukj1, ukj2;
    int          luptr, luptr1, luptr2;
    int          fsupc, nsupc, nsupr, segsze;
    int          nrow;	  /* No of rows in the matrix of matrix-vector */
@ -91,14 +91,14 @@ scolumn_bmod (
 			     panel and the first column of the current snode. */
    int          *xsup, *supno;
    int          *lsub, *xlsub;
-    float       *lusup;
+    double       *lusup;
    int          *xlusup;
    int          nzlumax;
-    float       *tempv1;
-    float      zero = 0.0;
+    double       *tempv1;
+    double      zero = 0.0;
 #ifdef USE_VENDOR_BLAS
-    float      one = 1.0;
-    float      none = -1.0;
+    double      one = 1.0;
+    double      none = -1.0;
 #endif
    int          mem_error;
    flops_t      *ops = stat->ops;
--- a/intern/opennl/superlu/scopy_to_ucol.c
+++ b/intern/opennl/superlu/scopy_to_ucol.c
@ -33,7 +33,7 @@ scopy_to_ucol(
 	      int        *segrep,  /* in */
 	      int        *repfnz,  /* in */
 	      int        *perm_r,  /* in */
-	      float     *dense,   /* modified - reset to zero on return */
+	      double     *dense,   /* modified - reset to zero on return */
 	      GlobalLU_t *Glu      /* modified */
 	      )
 {
@ -47,11 +47,11 @@ scopy_to_ucol(
    int new_next, mem_error;
    int       *xsup, *supno;
    int       *lsub, *xlsub;
-    float    *ucol;
+    double    *ucol;
    int       *usub, *xusub;
    int       nzumax;

-    float zero = 0.0;
+    double zero = 0.0;

    xsup    = Glu->xsup;
    supno   = Glu->supno;
--- a/intern/opennl/superlu/sgssv.c
+++ b/intern/opennl/superlu/sgssv.c
@ -119,7 +119,7 @@ sgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
 *         On exit, the solution matrix if info = 0;
 *
 * stat   (output) SuperLUStat_t*
- *        Record the statistics on runtime and floating-point operation count.
+ *        Record the statistics on runtime and doubleing-point operation count.
 *        See util.h for the definition of 'SuperLUStat_t'.
 *
 * info    (output) int*
--- a/intern/opennl/superlu/sgstrf.c
+++ b/intern/opennl/superlu/sgstrf.c
@ -56,7 +56,7 @@ sgstrf (superlu_options_t *options, SuperMatrix *A,
 *          (A->nrow, A->ncol). The type of A can be:
 *          Stype = SLU_NCP; Dtype = SLU_S; Mtype = SLU_GE.
 *
- * drop_tol (input) float (NOT IMPLEMENTED)
+ * drop_tol (input) double (NOT IMPLEMENTED)
 *	    Drop tolerance parameter. At step j of the Gaussian elimination,
 *          if abs(A_ij)/(max_i abs(A_ij)) < drop_tol, drop entry A_ij.
 *          0 <= drop_tol <= 1. The default value of drop_tol is 0.
@ -119,7 +119,7 @@ sgstrf (superlu_options_t *options, SuperMatrix *A,
 *          Dtype = SLU_S, Mtype = SLU_TRU.
 *
 * stat     (output) SuperLUStat_t*
- *          Record the statistics on runtime and floating-point operation count.
+ *          Record the statistics on runtime and doubleing-point operation count.
 *          See util.h for the definition of 'SuperLUStat_t'.
 *
 * info     (output) int*
@ -189,14 +189,14 @@ sgstrf (superlu_options_t *options, SuperMatrix *A,
 			   used when options->Fact == SamePattern_SameRowPerm */
    int       *iperm_c; /* inverse of perm_c */
    int       *iwork;
-    float    *swork;
+    double    *swork;
    int	      *segrep, *repfnz, *parent, *xplore;
    int	      *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */
    int	      *xprune;
    int	      *marker;
-    float    *dense, *tempv;
+    double    *dense, *tempv;
    int       *relax_end;
-    float    *a;
+    double    *a;
    int       *asub;
    int       *xa_begin, *xa_end;
    int       *xsup, *supno;
--- a/intern/opennl/superlu/sgstrs.c
+++ b/intern/opennl/superlu/sgstrs.c
@ -28,10 +28,10 @@
 /* 
 * Function prototypes 
 */
-void susolve(int, int, float*, float*);
-void slsolve(int, int, float*, float*);
-void smatvec(int, int, int, float*, float*, float*);
-void sprint_soln(int , float *);
+void susolve(int, int, double*, double*);
+void slsolve(int, int, double*, double*);
+void smatvec(int, int, int, double*, double*, double*);
+void sprint_soln(int , double *);

 void
 sgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
@ -82,7 +82,7 @@ sgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
 *         On exit, the solution matrix if info = 0;
 *
 * stat     (output) SuperLUStat_t*
- *          Record the statistics on runtime and floating-point operation count.
+ *          Record the statistics on runtime and doubleing-point operation count.
 *          See util.h for the definition of 'SuperLUStat_t'.
 *
 * info    (output) int*
@ -94,17 +94,17 @@ sgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
    _fcd ftcs1, ftcs2, ftcs3, ftcs4;
 #endif
 #ifdef USE_VENDOR_BLAS
-    float   alpha = 1.0, beta = 1.0;
-    float   *work_col;
+    double   alpha = 1.0, beta = 1.0;
+    double   *work_col;
 #endif
    DNformat *Bstore;
-    float   *Bmat;
+    double   *Bmat;
    SCformat *Lstore;
    NCformat *Ustore;
-    float   *Lval, *Uval;
+    double   *Lval, *Uval;
    int      fsupc, nrow, nsupr, nsupc, luptr, istart, irow;
    int      i, j, k, iptr, jcol, n, ldb, nrhs;
-    float   *work, *rhs_work, *soln;
+    double   *work, *rhs_work, *soln;
    flops_t  solve_ops;
    void sprint_soln();

@ -130,9 +130,9 @@ sgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
    }

    n = L->nrow;
-    work = floatCalloc(n * nrhs);
+    work = doubleCalloc(n * nrhs);
    if ( !work ) ABORT("Malloc fails for local work[].");
-    soln = floatMalloc(n);
+    soln = doubleMalloc(n);
    if ( !soln ) ABORT("Malloc fails for local soln[].");

    Bmat = Bstore->nzval;
@ -325,7 +325,7 @@ sgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
 * Diagnostic print of the solution vector 
 */
 void
-sprint_soln(int n, float *soln)
+sprint_soln(int n, double *soln)
 {
    int i;

--- a/intern/opennl/superlu/smemory.c
+++ b/intern/opennl/superlu/smemory.c
@ -24,8 +24,8 @@

 /* Internal prototypes */
 void  *sexpand (int *, MemType,int, int, GlobalLU_t *);
-int   sLUWorkInit (int, int, int, int **, float **, LU_space_t);
-void  copy_mem_float (int, void *, void *);
+int   sLUWorkInit (int, int, int, int **, double **, LU_space_t);
+void  copy_mem_double (int, void *, void *);
 void  sStackCompress (GlobalLU_t *);
 void  sSetupSpace (void *, int, LU_space_t *);
 void  *suser_malloc (int, int);
@ -59,7 +59,7 @@ static int no_expand;
 #define NotDoubleAlign(addr) ( (intptr_t)addr & 7 )
 #define DoubleAlign(addr)    ( ((intptr_t)addr + 7) & ~7L )
 #define TempSpace(m, w)      ( (2*w + 4 + NO_MARKER) * m * sizeof(int) + \
-			      (w + 1) * m * sizeof(float) )
+			      (w + 1) * m * sizeof(double) )
 #define Reduce(alpha)        ((alpha + 1) / 2)  /* i.e. (alpha-1)/2 + 1 */


@ -119,9 +119,9 @@ void suser_free(int bytes, int which_end)

 /*
 * mem_usage consists of the following fields:
- *    - for_lu (float)
+ *    - for_lu (double)
 *      The amount of space used in bytes for the L\U data structures.
- *    - total_needed (float)
+ *    - total_needed (double)
 *      The amount of space needed in bytes to perform factorization.
 *    - expansions (int)
 *      Number of memory expansions during the LU factorization.
@ -136,17 +136,17 @@ int sQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage)
    Ustore = U->Store;
    n = L->ncol;
    iword = sizeof(int);
-    dword = sizeof(float);
+    dword = sizeof(double);

    /* For LU factors */
-    mem_usage->for_lu = (float)( (4*n + 3) * iword + Lstore->nzval_colptr[n] *
+    mem_usage->for_lu = (double)( (4*n + 3) * iword + Lstore->nzval_colptr[n] *
 				 dword + Lstore->rowind_colptr[n] * iword );
-    mem_usage->for_lu += (float)( (n + 1) * iword +
+    mem_usage->for_lu += (double)( (n + 1) * iword +
 				 Ustore->colptr[n] * (dword + iword) );

    /* Working storage to support factorization */
    mem_usage->total_needed = mem_usage->for_lu +
-	(float)( (2 * panel_size + 4 + NO_MARKER) * n * iword +
+	(double)( (2 * panel_size + 4 + NO_MARKER) * n * iword +
 		(panel_size + 1) * n * dword );

    mem_usage->expansions = --no_expand;
@ -165,16 +165,16 @@ int sQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage)
 int
 sLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
 	  int panel_size, SuperMatrix *L, SuperMatrix *U, GlobalLU_t *Glu,
-	  int **iwork, float **dwork)
+	  int **iwork, double **dwork)
 {
    int      info, iword, dword;
    SCformat *Lstore;
    NCformat *Ustore;
    int      *xsup, *supno;
    int      *lsub, *xlsub;
-    float   *lusup;
+    double   *lusup;
    int      *xlusup;
-    float   *ucol;
+    double   *ucol;
    int      *usub, *xusub;
    int      nzlmax, nzumax, nzlumax;
    int      FILL = sp_ienv(6);
@ -182,7 +182,7 @@ sLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
    Glu->n    = n;
    no_expand = 0;
    iword     = sizeof(int);
-    dword     = sizeof(float);
+    dword     = sizeof(double);

    if ( !expanders )	
        expanders = (ExpHeader*)SUPERLU_MALLOC(NO_MEMTYPE * sizeof(ExpHeader));
@ -220,8 +220,8 @@ sLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
 	    xusub  = (int *)suser_malloc((n+1) * iword, HEAD);
 	}

-	lusup = (float *) sexpand( &nzlumax, LUSUP, 0, 0, Glu );
-	ucol  = (float *) sexpand( &nzumax, UCOL, 0, 0, Glu );
+	lusup = (double *) sexpand( &nzlumax, LUSUP, 0, 0, Glu );
+	ucol  = (double *) sexpand( &nzumax, UCOL, 0, 0, Glu );
 	lsub  = (int *)    sexpand( &nzlmax, LSUB, 0, 0, Glu );
 	usub  = (int *)    sexpand( &nzumax, USUB, 0, 1, Glu );

@ -241,8 +241,8 @@ sLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
 		printf("Not enough memory to perform factorization.\n");
 		return (smemory_usage(nzlmax, nzumax, nzlumax, n) + n);
 	    }
-	    lusup = (float *) sexpand( &nzlumax, LUSUP, 0, 0, Glu );
-	    ucol  = (float *) sexpand( &nzumax, UCOL, 0, 0, Glu );
+	    lusup = (double *) sexpand( &nzlumax, LUSUP, 0, 0, Glu );
+	    ucol  = (double *) sexpand( &nzumax, UCOL, 0, 0, Glu );
 	    lsub  = (int *)    sexpand( &nzlmax, LSUB, 0, 0, Glu );
 	    usub  = (int *)    sexpand( &nzumax, USUB, 0, 1, Glu );
 	}
@ -307,16 +307,16 @@ sLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
   returns the number of bytes allocated so far when failure occurred. */
 int
 sLUWorkInit(int m, int n, int panel_size, int **iworkptr, 
-            float **dworkptr, LU_space_t MemModel)
+            double **dworkptr, LU_space_t MemModel)
 {
    int    isize, dsize, extra;
-    float *old_ptr;
+    double *old_ptr;
    int    maxsuper = sp_ienv(3),
           rowblk   = sp_ienv(4);

    isize = ( (2 * panel_size + 3 + NO_MARKER ) * m + n ) * sizeof(int);
    dsize = (m * panel_size +
-	     NUM_TEMPV(m,panel_size,maxsuper,rowblk)) * sizeof(float);
+	     NUM_TEMPV(m,panel_size,maxsuper,rowblk)) * sizeof(double);
    
    if ( MemModel == SYSTEM ) 
 	*iworkptr = (int *) intCalloc(isize/sizeof(int));
@ -328,13 +328,13 @@ sLUWorkInit(int m, int n, int panel_size, int **iworkptr,
    }

    if ( MemModel == SYSTEM )
-	*dworkptr = (float *) SUPERLU_MALLOC(dsize);
+	*dworkptr = (double *) SUPERLU_MALLOC(dsize);
    else {
-	*dworkptr = (float *) suser_malloc(dsize, TAIL);
+	*dworkptr = (double *) suser_malloc(dsize, TAIL);
 	if ( NotDoubleAlign(*dworkptr) ) {
 	    old_ptr = *dworkptr;
-	    *dworkptr = (float*) DoubleAlign(*dworkptr);
-	    *dworkptr = (float*) ((double*)*dworkptr - 1);
+	    *dworkptr = (double*) DoubleAlign(*dworkptr);
+	    *dworkptr = (double*) ((double*)*dworkptr - 1);
 	    extra = (char*)old_ptr - (char*)*dworkptr;
 #ifdef DEBUG	    
 	    printf("sLUWorkInit: not aligned, extra %d\n", extra);
@ -356,10 +356,10 @@ sLUWorkInit(int m, int n, int panel_size, int **iworkptr,
 * Set up pointers for real working arrays.
 */
 void
-sSetRWork(int m, int panel_size, float *dworkptr,
-	 float **dense, float **tempv)
+sSetRWork(int m, int panel_size, double *dworkptr,
+	 double **dense, double **tempv)
 {
-    float zero = 0.0;
+    double zero = 0.0;

    int maxsuper = sp_ienv(3),
        rowblk   = sp_ienv(4);
@ -372,7 +372,7 @@ sSetRWork(int m, int panel_size, float *dworkptr,
 /*
 * Free the working storage used by factor routines.
 */
-void sLUWorkFree(int *iwork, float *dwork, GlobalLU_t *Glu)
+void sLUWorkFree(int *iwork, double *dwork, GlobalLU_t *Glu)
 {
    if ( Glu->MemModel == SYSTEM ) {
 	SUPERLU_FREE (iwork);
@ -421,11 +421,11 @@ sLUMemXpand(int jcol,

    switch ( mem_type ) {
      case LUSUP:
-	Glu->lusup   = (float *) new_mem;
+	Glu->lusup   = (double *) new_mem;
 	Glu->nzlumax = *maxlen;
 	break;
      case UCOL:
-	Glu->ucol   = (float *) new_mem;
+	Glu->ucol   = (double *) new_mem;
 	Glu->nzumax = *maxlen;
 	break;
      case LSUB:
@ -445,11 +445,11 @@ sLUMemXpand(int jcol,


 void
-copy_mem_float(int howmany, void *old, void *new)
+copy_mem_double(int howmany, void *old, void *new)
 {
    register int i;
-    float *dold = old;
-    float *dnew = new;
+    double *dold = old;
+    double *dnew = new;
    for (i = 0; i < howmany; i++) dnew[i] = dold[i];
 }

@ -466,8 +466,8 @@ void
 	 GlobalLU_t *Glu  /* modified - global LU data structures */
 	)
 {
-    float    EXPAND = 1.5;
-    float    alpha;
+    double    EXPAND = 1.5;
+    double    alpha;
    void     *new_mem, *old_mem;
    int      new_len, tries, lword, extra, bytes_to_copy;

@ -480,7 +480,7 @@ void
    }
    
    if ( type == LSUB || type == USUB ) lword = sizeof(int);
-    else lword = sizeof(float);
+    else lword = sizeof(double);

    if ( Glu->MemModel == SYSTEM ) {
 	new_mem = (void *) SUPERLU_MALLOC(new_len * lword);
@ -501,7 +501,7 @@ void
 	    if ( type == LSUB || type == USUB ) {
 		copy_mem_int(len_to_copy, expanders[type].mem, new_mem);
 	    } else {
-		copy_mem_float(len_to_copy, expanders[type].mem, new_mem);
+		copy_mem_double(len_to_copy, expanders[type].mem, new_mem);
 	    }
 	    SUPERLU_FREE (expanders[type].mem);
 	}
@ -585,12 +585,12 @@ sStackCompress(GlobalLU_t *Glu)
    register int iword, dword, ndim;
    char    *last, *fragment;
    int      *ifrom, *ito;
-    float   *dfrom, *dto;
+    double   *dfrom, *dto;
    int      *xlsub, *lsub, *xusub, *usub, *xlusup;
-    float   *ucol, *lusup;
+    double   *ucol, *lusup;
    
    iword = sizeof(int);
-    dword = sizeof(float);
+    dword = sizeof(double);
    ndim = Glu->n;

    xlsub  = Glu->xlsub;
@ -602,8 +602,8 @@ sStackCompress(GlobalLU_t *Glu)
    lusup  = Glu->lusup;
    
    dfrom = ucol;
-    dto = (float *)((char*)lusup + xlusup[ndim] * dword);
-    copy_mem_float(xusub[ndim], dfrom, dto);
+    dto = (double *)((char*)lusup + xlusup[ndim] * dword);
+    copy_mem_double(xusub[ndim], dfrom, dto);
    ucol = dto;

    ifrom = lsub;
@ -637,32 +637,32 @@ sStackCompress(GlobalLU_t *Glu)
 * Allocate storage for original matrix A
 */
 void
-sallocateA(int n, int nnz, float **a, int **asub, int **xa)
+sallocateA(int n, int nnz, double **a, int **asub, int **xa)
 {
-    *a    = (float *) floatMalloc(nnz);
+    *a    = (double *) doubleMalloc(nnz);
    *asub = (int *) intMalloc(nnz);
    *xa   = (int *) intMalloc(n+1);
 }


-float *floatMalloc(int n)
+double *doubleMalloc(int n)
 {
-    float *buf;
-    buf = (float *) SUPERLU_MALLOC(n * sizeof(float)); 
+    double *buf;
+    buf = (double *) SUPERLU_MALLOC(n * sizeof(double)); 
    if ( !buf ) {
-	ABORT("SUPERLU_MALLOC failed for buf in floatMalloc()\n");
+	ABORT("SUPERLU_MALLOC failed for buf in doubleMalloc()\n");
    }
    return (buf);
 }

-float *floatCalloc(int n)
+double *doubleCalloc(int n)
 {
-    float *buf;
+    double *buf;
    register int i;
-    float zero = 0.0;
-    buf = (float *) SUPERLU_MALLOC(n * sizeof(float));
+    double zero = 0.0;
+    buf = (double *) SUPERLU_MALLOC(n * sizeof(double));
    if ( !buf ) {
-	ABORT("SUPERLU_MALLOC failed for buf in floatCalloc()\n");
+	ABORT("SUPERLU_MALLOC failed for buf in doubleCalloc()\n");
    }
    for (i = 0; i < n; ++i) buf[i] = zero;
    return (buf);
@ -675,7 +675,7 @@ int smemory_usage(const int nzlmax, const int nzumax,
    register int iword, dword;

    iword   = sizeof(int);
-    dword   = sizeof(float);
+    dword   = sizeof(double);
    
    return (10 * n * iword +
 	    nzlmax * iword + nzumax * (iword + dword) + nzlumax * dword);
--- a/intern/opennl/superlu/smyblas2.c
+++ b/intern/opennl/superlu/smyblas2.c
@ -25,17 +25,17 @@
 */

 /* local prototypes*/
-void slsolve ( int, int, float *, float *);
-void susolve ( int, int, float *, float *);
-void smatvec ( int, int, int, float *, float *, float *);
+void slsolve ( int, int, double *, double *);
+void susolve ( int, int, double *, double *);
+void smatvec ( int, int, int, double *, double *, double *);


-void slsolve ( int ldm, int ncol, float *M, float *rhs )
+void slsolve ( int ldm, int ncol, double *M, double *rhs )
 {
    int k;
-    float x0, x1, x2, x3, x4, x5, x6, x7;
-    float *M0;
-    register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
+    double x0, x1, x2, x3, x4, x5, x6, x7;
+    double *M0;
+    register double *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
    register int firstcol = 0;

    M0 = &M[0];
@ -131,10 +131,10 @@ void
 susolve ( ldm, ncol, M, rhs )
 int ldm;	/* in */
 int ncol;	/* in */
-float *M;	/* in */
-float *rhs;	/* modified */
+double *M;	/* in */
+double *rhs;	/* modified */
 {
-    float xj;
+    double xj;
    int jcol, j, irow;

    jcol = ncol - 1;
@ -162,14 +162,14 @@ void smatvec ( ldm, nrow, ncol, M, vec, Mxvec )
 int ldm;	/* in -- leading dimension of M */
 int nrow;	/* in */ 
 int ncol;	/* in */
-float *M;	/* in */
-float *vec;	/* in */
-float *Mxvec;	/* in/out */
+double *M;	/* in */
+double *vec;	/* in */
+double *Mxvec;	/* in/out */

 {
-    float vi0, vi1, vi2, vi3, vi4, vi5, vi6, vi7;
-    float *M0;
-    register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
+    double vi0, vi1, vi2, vi3, vi4, vi5, vi6, vi7;
+    double *M0;
+    register double *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
    register int firstcol = 0;
    int k;

--- a/intern/opennl/superlu/spanel_bmod.c
+++ b/intern/opennl/superlu/spanel_bmod.c
@ -29,8 +29,8 @@
 /* 
 * Function prototypes 
 */
-void slsolve(int, int, float *, float *);
-void smatvec(int, int, int, float *, float *, float *);
+void slsolve(int, int, double *, double *);
+void smatvec(int, int, int, double *, double *, double *);
 extern void scheck_tempv();

 void
@ -39,8 +39,8 @@ spanel_bmod (
 	    const int  w,          /* in */
 	    const int  jcol,       /* in */
 	    const int  nseg,       /* in */
-	    float     *dense,     /* out, of size n by w */
-	    float     *tempv,     /* working array */
+	    double     *dense,     /* out, of size n by w */
+	    double     *tempv,     /* working array */
 	    int        *segrep,    /* in */
 	    int        *repfnz,    /* in, of size n by w */
 	    GlobalLU_t *Glu,       /* modified */
@ -71,13 +71,13 @@ spanel_bmod (
         ftcs3 = _cptofcd("U", strlen("U"));
 #endif
    int          incx = 1, incy = 1;
-    float       alpha, beta;
+    double       alpha, beta;
 #endif

    register int k, ksub;
    int          fsupc, nsupc, nsupr, nrow;
    int          krep, krep_ind;
-    float       ukj, ukj1, ukj2;
+    double       ukj, ukj1, ukj2;
    int          luptr, luptr1, luptr2;
    int          segsze;
    int          block_nrow;  /* no of rows in a block row */
@ -87,13 +87,13 @@ spanel_bmod (
    register int jj;	      /* Index through each column in the panel */
    int          *xsup, *supno;
    int          *lsub, *xlsub;
-    float       *lusup;
+    double       *lusup;
    int          *xlusup;
    int          *repfnz_col; /* repfnz[] for a column in the panel */
-    float       *dense_col;  /* dense[] for a column in the panel */
-    float       *tempv1;             /* Used in 1-D update */
-    float       *TriTmp, *MatvecTmp; /* used in 2-D update */
-    float      zero = 0.0;
+    double       *dense_col;  /* dense[] for a column in the panel */
+    double       *tempv1;             /* Used in 1-D update */
+    double       *TriTmp, *MatvecTmp; /* used in 2-D update */
+    double      zero = 0.0;
    register int ldaTmp;
    register int r_ind, r_hi;
    static   int first = 1, maxsuper, rowblk, colblk;
--- a/intern/opennl/superlu/spanel_dfs.c
+++ b/intern/opennl/superlu/spanel_dfs.c
@ -34,7 +34,7 @@ spanel_dfs (
 	   SuperMatrix *A,       /* in - original matrix */
 	   int        *perm_r,     /* in */
 	   int        *nseg,	   /* out */
-	   float     *dense,      /* out */
+	   double     *dense,      /* out */
 	   int        *panel_lsub, /* out */
 	   int        *segrep,     /* out */
 	   int        *repfnz,     /* out */
@ -74,7 +74,7 @@ spanel_dfs (
 *
 */
    NCPformat *Astore;
-    float    *a;
+    double    *a;
    int       *asub;
    int       *xa_begin, *xa_end;
    int	      krep, chperm, chmark, chrep, oldrep, kchild, myfnz;
@ -84,7 +84,7 @@ spanel_dfs (
    int       *marker1;	   /* marker1[jj] >= jcol if vertex jj was visited 
 			      by a previous column within this panel.   */
    int       *repfnz_col; /* start of each column in the panel */
-    float    *dense_col;  /* start of each column in the panel */
+    double    *dense_col;  /* start of each column in the panel */
    int       nextl_col;   /* next available position in panel_lsub[*,jj] */
    int       *xsup, *supno;
    int       *lsub, *xlsub;
--- a/intern/opennl/superlu/spivotL.c
+++ b/intern/opennl/superlu/spivotL.c
@ -31,7 +31,7 @@
 int
 spivotL(
        const int  jcol,     /* in */
-        const float u,      /* in - diagonal pivoting threshold */
+        const double u,      /* in - diagonal pivoting threshold */
        int        *usepr,   /* re-use the pivot sequence given by perm_r/iperm_r */
        int        *perm_r,  /* may be modified */
        int        *iperm_r, /* in - inverse of perm_r */
@ -67,14 +67,14 @@ spivotL(
    int          nsupr;     /* no of rows in the supernode */
    int          lptr;	    /* points to the starting subscript of the supernode */
    int          pivptr, old_pivptr, diag, diagind;
-    float       pivmax, rtemp, thresh;
-    float       temp;
-    float       *lu_sup_ptr; 
-    float       *lu_col_ptr;
+    double       pivmax, rtemp, thresh;
+    double       temp;
+    double       *lu_sup_ptr; 
+    double       *lu_col_ptr;
    int          *lsub_ptr;
    int          isub, icol, k, itemp;
    int          *lsub, *xlsub;
-    float       *lusup;
+    double       *lusup;
    int          *xlusup;
    flops_t      *ops = stat->ops;

--- a/intern/opennl/superlu/spruneL.c
+++ b/intern/opennl/superlu/spruneL.c
@ -45,13 +45,13 @@ spruneL(
 *   contains the current pivot row "pivrow"
 *
 */
-    float     utemp;
+    double     utemp;
    int        jsupno, irep, irep1, kmin, kmax, krow, movnum;
    int        i, ktemp, minloc, maxloc;
    int        do_prune; /* logical variable */
    int        *xsup, *supno;
    int        *lsub, *xlsub;
-    float     *lusup;
+    double     *lusup;
    int        *xlusup;

    xsup       = Glu->xsup;
--- a/intern/opennl/superlu/ssnode_bmod.c
+++ b/intern/opennl/superlu/ssnode_bmod.c
@ -24,8 +24,8 @@

 #include "ssp_defs.h"

-void slsolve(int, int, float*, float*);
-void smatvec(int, int, int, float*, float*, float*);
+void slsolve(int, int, double*, double*);
+void smatvec(int, int, int, double*, double*, double*);

 /*
 * Performs numeric block updates within the relaxed snode. 
@ -34,8 +34,8 @@ int
 ssnode_bmod (
 	    const int  jcol,	  /* in */
 	    const int  fsupc,     /* in */
-	    float     *dense,    /* in */
-	    float     *tempv,    /* working array */
+	    double     *dense,    /* in */
+	    double     *tempv,    /* working array */
 	    GlobalLU_t *Glu,      /* modified */
 	    SuperLUStat_t *stat   /* output */
 	    )
@ -47,14 +47,14 @@ ssnode_bmod (
 	 ftcs3 = _cptofcd("U", strlen("U"));
 #endif
    int            incx = 1, incy = 1;
-    float         alpha = -1.0, beta = 1.0;
+    double         alpha = -1.0, beta = 1.0;
 #endif

    int            luptr, nsupc, nsupr, nrow;
    int            isub, irow, i, iptr; 
    register int   ufirst, nextlu;
    int            *lsub, *xlsub;
-    float         *lusup;
+    double         *lusup;
    int            *xlusup;
    flops_t *ops = stat->ops;

--- a/intern/opennl/superlu/ssp_blas2.c
+++ b/intern/opennl/superlu/ssp_blas2.c
@ -19,14 +19,14 @@
 /* 
 * Function prototypes 
 */
-void susolve(int, int, float*, float*);
-void slsolve(int, int, float*, float*);
-void smatvec(int, int, int, float*, float*, float*);
-int strsv_(char*, char*, char*, int*, float*, int*, float*, int*);
+void susolve(int, int, double*, double*);
+void slsolve(int, int, double*, double*);
+void smatvec(int, int, int, double*, double*, double*);
+int strsv_(char*, char*, char*, int*, double*, int*, double*, int*);

 int
 sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L, 
-         SuperMatrix *U, float *x, SuperLUStat_t *stat, int *info)
+         SuperMatrix *U, double *x, SuperLUStat_t *stat, int *info)
 {
 /*
 *   Purpose
@ -71,7 +71,7 @@ sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
 *	        The factor U from the factorization Pr*A*Pc=L*U.
 *	        U has types: Stype = NC, Dtype = SLU_S, Mtype = TRU.
 *    
- *   x       - (input/output) float*
+ *   x       - (input/output) double*
 *             Before entry, the incremented array X must contain the n   
 *             element right-hand side vector b. On exit, X is overwritten 
 *             with the solution vector x.
@ -87,12 +87,12 @@ sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
 #endif
    SCformat *Lstore;
    NCformat *Ustore;
-    float   *Lval, *Uval;
+    double   *Lval, *Uval;
    int incx = 1;
    int nrow;
    int fsupc, nsupr, nsupc, luptr, istart, irow;
    int i, k, iptr, jcol;
-    float *work;
+    double *work;
    flops_t solve_ops;

    /* Test the input parameters */
@ -115,7 +115,7 @@ sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
    Uval = Ustore->nzval;
    solve_ops = 0;

-    if ( !(work = floatCalloc(L->nrow)) )
+    if ( !(work = doubleCalloc(L->nrow)) )
 	ABORT("Malloc fails for work in sp_strsv().");
    
    if ( lsame_(trans, "N") ) {	/* Form x := inv(A)*x. */
@ -306,8 +306,8 @@ sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L,


 int
-sp_sgemv(char *trans, float alpha, SuperMatrix *A, float *x, 
-	 int incx, float beta, float *y, int incy)
+sp_sgemv(char *trans, double alpha, SuperMatrix *A, double *x, 
+	 int incx, double beta, double *y, int incy)
 {
 /*  Purpose   
    =======   
@ -327,7 +327,7 @@ sp_sgemv(char *trans, float alpha, SuperMatrix *A, float *x,
                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   
                TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.   

-    ALPHA  - (input) float
+    ALPHA  - (input) double
             On entry, ALPHA specifies the scalar alpha.   

    A      - (input) SuperMatrix*
@ -336,7 +336,7 @@ sp_sgemv(char *trans, float alpha, SuperMatrix *A, float *x,
                 Stype = NC or NCP; Dtype = SLU_S; Mtype = GE. 
             In the future, more general A can be handled.

-    X      - (input) float*, array of DIMENSION at least   
+    X      - (input) double*, array of DIMENSION at least   
             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
             and at least   
             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
@ -347,11 +347,11 @@ sp_sgemv(char *trans, float alpha, SuperMatrix *A, float *x,
             On entry, INCX specifies the increment for the elements of   
             X. INCX must not be zero.   

-    BETA   - (input) float
+    BETA   - (input) double
             On entry, BETA specifies the scalar beta. When BETA is   
             supplied as zero then Y need not be set on input.   

-    Y      - (output) float*,  array of DIMENSION at least   
+    Y      - (output) double*,  array of DIMENSION at least   
             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
             and at least   
             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
@ -368,9 +368,9 @@ sp_sgemv(char *trans, float alpha, SuperMatrix *A, float *x,

    /* Local variables */
    NCformat *Astore;
-    float   *Aval;
+    double   *Aval;
    int info;
-    float temp;
+    double temp;
    int lenx, leny, i, j, irow;
    int iy, jx, jy, kx, ky;
    int notran;
--- a/intern/opennl/superlu/ssp_blas3.c
+++ b/intern/opennl/superlu/ssp_blas3.c
@ -20,8 +20,8 @@

 int
 sp_sgemm(char *transa, int n, 
-         float alpha, SuperMatrix *A, float *b, int ldb, 
-         float beta, float *c, int ldc)
+         double alpha, SuperMatrix *A, double *b, int ldb, 
+         double beta, double *c, int ldc)
 {
 /*  Purpose   
    =======   
@ -74,7 +74,7 @@ sp_sgemm(char *transa, int n,
 	     be at least  zero.   
             Unchanged on exit.
 	     
-    ALPHA  - (input) float
+    ALPHA  - (input) double
             On entry, ALPHA specifies the scalar alpha.   

    A      - (input) SuperMatrix*
@ -96,7 +96,7 @@ sp_sgemm(char *transa, int n,
             in the calling (sub) program. LDB must be at least max( 1, n ).  
             Unchanged on exit.   

-    BETA   - (input) float
+    BETA   - (input) double
             On entry, BETA specifies the scalar beta. When BETA is   
             supplied as zero then C need not be set on input.   

--- a/intern/opennl/superlu/ssp_defs.h
+++ b/intern/opennl/superlu/ssp_defs.h
@ -88,9 +88,9 @@ typedef struct {
    int     *supno;   
    int     *lsub;    /* compressed L subscripts */
    int	    *xlsub;
-    float  *lusup;   /* L supernodes */
+    double  *lusup;   /* L supernodes */
    int     *xlusup;
-    float  *ucol;    /* U columns */
+    double  *ucol;    /* U columns */
    int     *usub;
    int	    *xusub;
    int     nzlmax;   /* current max size of lsub */
@ -101,8 +101,8 @@ typedef struct {
 } GlobalLU_t;

 typedef struct {
-    float for_lu;
-    float total_needed;
+    double for_lu;
+    double total_needed;
    int   expansions;
 } mem_usage_t;

@ -116,61 +116,61 @@ sgssv(superlu_options_t *, SuperMatrix *, int *, int *, SuperMatrix *,
      SuperMatrix *, SuperMatrix *, SuperLUStat_t *, int *);
 extern void
 sgssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *,
-       char *, float *, float *, SuperMatrix *, SuperMatrix *,
+       char *, double *, double *, SuperMatrix *, SuperMatrix *,
       void *, int, SuperMatrix *, SuperMatrix *,
-       float *, float *, float *, float *,
+       double *, double *, double *, double *,
       mem_usage_t *, SuperLUStat_t *, int *);

 /* Supernodal LU factor related */
 extern void
-sCreate_CompCol_Matrix(SuperMatrix *, int, int, int, float *,
+sCreate_CompCol_Matrix(SuperMatrix *, int, int, int, double *,
 		       int *, int *, Stype_t, Dtype_t, Mtype_t);
 extern void
-sCreate_CompRow_Matrix(SuperMatrix *, int, int, int, float *,
+sCreate_CompRow_Matrix(SuperMatrix *, int, int, int, double *,
 		       int *, int *, Stype_t, Dtype_t, Mtype_t);
 extern void
 sCopy_CompCol_Matrix(SuperMatrix *, SuperMatrix *);
 extern void
-sCreate_Dense_Matrix(SuperMatrix *, int, int, float *, int,
+sCreate_Dense_Matrix(SuperMatrix *, int, int, double *, int,
 		     Stype_t, Dtype_t, Mtype_t);
 extern void
-sCreate_SuperNode_Matrix(SuperMatrix *, int, int, int, float *, 
+sCreate_SuperNode_Matrix(SuperMatrix *, int, int, int, double *, 
 		         int *, int *, int *, int *, int *,
 			 Stype_t, Dtype_t, Mtype_t);
 extern void
-sCopy_Dense_Matrix(int, int, float *, int, float *, int);
+sCopy_Dense_Matrix(int, int, double *, int, double *, int);

 extern void    countnz (const int, int *, int *, int *, GlobalLU_t *);
 extern void    fixupL (const int, const int *, GlobalLU_t *);

-extern void    sallocateA (int, int, float **, int **, int **);
+extern void    sallocateA (int, int, double **, int **, int **);
 extern void    sgstrf (superlu_options_t*, SuperMatrix*, 
                       int, int, int*, void *, int, int *, int *, 
                       SuperMatrix *, SuperMatrix *, SuperLUStat_t*, int *);
 extern int     ssnode_dfs (const int, const int, const int *, const int *,
 			     const int *, int *, int *, GlobalLU_t *);
-extern int     ssnode_bmod (const int, const int, float *,
-                              float *, GlobalLU_t *, SuperLUStat_t*);
+extern int     ssnode_bmod (const int, const int, double *,
+                              double *, GlobalLU_t *, SuperLUStat_t*);
 extern void    spanel_dfs (const int, const int, const int, SuperMatrix *,
-			   int *, int *, float *, int *, int *, int *,
+			   int *, int *, double *, int *, int *, int *,
 			   int *, int *, int *, int *, GlobalLU_t *);
 extern void    spanel_bmod (const int, const int, const int, const int,
-                           float *, float *, int *, int *,
+                           double *, double *, int *, int *,
 			   GlobalLU_t *, SuperLUStat_t*);
 extern int     scolumn_dfs (const int, const int, int *, int *, int *, int *,
 			   int *, int *, int *, int *, int *, GlobalLU_t *);
-extern int     scolumn_bmod (const int, const int, float *,
-			   float *, int *, int *, int,
+extern int     scolumn_bmod (const int, const int, double *,
+			   double *, int *, int *, int,
                           GlobalLU_t *, SuperLUStat_t*);
 extern int     scopy_to_ucol (int, int, int *, int *, int *,
-                              float *, GlobalLU_t *);         
-extern int     spivotL (const int, const float, int *, int *, 
+                              double *, GlobalLU_t *);         
+extern int     spivotL (const int, const double, int *, int *, 
                         int *, int *, int *, GlobalLU_t *, SuperLUStat_t*);
 extern void    spruneL (const int, const int *, const int, const int,
 			  const int *, const int *, int *, GlobalLU_t *);
-extern void    sreadmt (int *, int *, int *, float **, int **, int **);
-extern void    sGenXtrue (int, int, float *, int);
-extern void    sFillRHS (trans_t, int, float *, int, SuperMatrix *,
+extern void    sreadmt (int *, int *, int *, double **, int **, int **);
+extern void    sGenXtrue (int, int, double *, int);
+extern void    sFillRHS (trans_t, int, double *, int, SuperMatrix *,
 			  SuperMatrix *);
 extern void    sgstrs (trans_t, SuperMatrix *, SuperMatrix *, int *, int *,
                        SuperMatrix *, SuperLUStat_t*, int *);
@ -178,56 +178,56 @@ extern void    sgstrs (trans_t, SuperMatrix *, SuperMatrix *, int *, int *,

 /* Driver related */

-extern void    sgsequ (SuperMatrix *, float *, float *, float *,
-			float *, float *, int *);
-extern void    slaqgs (SuperMatrix *, float *, float *, float,
-                        float, float, char *);
+extern void    sgsequ (SuperMatrix *, double *, double *, double *,
+			double *, double *, int *);
+extern void    slaqgs (SuperMatrix *, double *, double *, double,
+                        double, double, char *);
 extern void    sgscon (char *, SuperMatrix *, SuperMatrix *, 
-		         float, float *, SuperLUStat_t*, int *);
-extern float   sPivotGrowth(int, SuperMatrix *, int *, 
+		         double, double *, SuperLUStat_t*, int *);
+extern double   sPivotGrowth(int, SuperMatrix *, int *, 
                            SuperMatrix *, SuperMatrix *);
 extern void    sgsrfs (trans_t, SuperMatrix *, SuperMatrix *,
-                       SuperMatrix *, int *, int *, char *, float *, 
-                       float *, SuperMatrix *, SuperMatrix *,
-                       float *, float *, SuperLUStat_t*, int *);
+                       SuperMatrix *, int *, int *, char *, double *, 
+                       double *, SuperMatrix *, SuperMatrix *,
+                       double *, double *, SuperLUStat_t*, int *);

 extern int     sp_strsv (char *, char *, char *, SuperMatrix *,
-			SuperMatrix *, float *, SuperLUStat_t*, int *);
-extern int     sp_sgemv (char *, float, SuperMatrix *, float *,
-			int, float, float *, int);
+			SuperMatrix *, double *, SuperLUStat_t*, int *);
+extern int     sp_sgemv (char *, double, SuperMatrix *, double *,
+			int, double, double *, int);

-extern int     sp_sgemm (char *, int, float,
-			SuperMatrix *, float *, int, float, 
-			float *, int);
+extern int     sp_sgemm (char *, int, double,
+			SuperMatrix *, double *, int, double, 
+			double *, int);

 /* Memory-related */
 extern int     sLUMemInit (fact_t, void *, int, int, int, int, int,
 			     SuperMatrix *, SuperMatrix *,
-			     GlobalLU_t *, int **, float **);
-extern void    sSetRWork (int, int, float *, float **, float **);
-extern void    sLUWorkFree (int *, float *, GlobalLU_t *);
+			     GlobalLU_t *, int **, double **);
+extern void    sSetRWork (int, int, double *, double **, double **);
+extern void    sLUWorkFree (int *, double *, GlobalLU_t *);
 extern int     sLUMemXpand (int, int, MemType, int *, GlobalLU_t *);

-extern float  *floatMalloc(int);
-extern float  *floatCalloc(int);
+extern double  *doubleMalloc(int);
+extern double  *doubleCalloc(int);
 extern int     smemory_usage(const int, const int, const int, const int);
 extern int     sQuerySpace (SuperMatrix *, SuperMatrix *, mem_usage_t *);

 /* Auxiliary routines */
-extern void    sreadhb(int *, int *, int *, float **, int **, int **);
-extern void    sCompRow_to_CompCol(int, int, int, float*, int*, int*,
-		                   float **, int **, int **);
-extern void    sfill (float *, int, float);
-extern void    sinf_norm_error (int, SuperMatrix *, float *);
+extern void    sreadhb(int *, int *, int *, double **, int **, int **);
+extern void    sCompRow_to_CompCol(int, int, int, double*, int*, int*,
+		                   double **, int **, int **);
+extern void    sfill (double *, int, double);
+extern void    sinf_norm_error (int, SuperMatrix *, double *);
 extern void    PrintPerf (SuperMatrix *, SuperMatrix *, mem_usage_t *,
-			 float, float, float *, float *, char *);
+			 double, double, double *, double *, char *);

 /* Routines for debugging */
 extern void    sPrint_CompCol_Matrix(char *, SuperMatrix *);
 extern void    sPrint_SuperNode_Matrix(char *, SuperMatrix *);
 extern void    sPrint_Dense_Matrix(char *, SuperMatrix *);
 extern void    print_lu_col(char *, int, int, int *, GlobalLU_t *);
-extern void    check_tempv(int, float *);
+extern void    check_tempv(int, double *);
 extern int		print_int_vec(char *what, int n, int *vec);

 extern int		sp_symetree(int *acolst, int *acolend, int *arow, int n, int *parent);
--- a/intern/opennl/superlu/strsv.c
+++ b/intern/opennl/superlu/strsv.c
@ -1,17 +1,17 @@
 /** \file opennl/superlu/strsv.c
 *  \ingroup opennl
 */
-int strsv_(char *, char *, char *, int *, float *, int *, float *, int *);
+int strsv_(char *, char *, char *, int *, double *, int *, double *, int *);


 /* Subroutine */ int strsv_(char *uplo, char *trans, char *diag, int *n, 
-	float *a, int *lda, float *x, int *incx)
+	double *a, int *lda, double *x, int *incx)
 {


    /* Local variables */
    static int info;
-    static float temp;
+    static double temp;
    static int i, j;
    extern int lsame_(char *, char *);
    static int ix, jx, kx;
--- a/intern/opennl/superlu/sutil.c
+++ b/intern/opennl/superlu/sutil.c
@ -27,15 +27,15 @@

 /* prototypes */
 void sprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu);
-void scheck_tempv(int n, float *tempv);
-void sPrintPerf(SuperMatrix *, SuperMatrix *, mem_usage_t *,float , float , 
-				 float *, float *, char *, SuperLUStat_t *);
-int print_float_vec(char *what, int n, float *vec);
+void scheck_tempv(int n, double *tempv);
+void sPrintPerf(SuperMatrix *, SuperMatrix *, mem_usage_t *,double , double , 
+				 double *, double *, char *, SuperLUStat_t *);
+int print_double_vec(char *what, int n, double *vec);
 /* ********** */

 void
 sCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz, 
-		       float *nzval, int *rowind, int *colptr,
+		       double *nzval, int *rowind, int *colptr,
 		       Stype_t stype, Dtype_t dtype, Mtype_t mtype)
 {
    NCformat *Astore;
@ -56,7 +56,7 @@ sCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,

 void
 sCreate_CompRow_Matrix(SuperMatrix *A, int m, int n, int nnz, 
-		       float *nzval, int *colind, int *rowptr,
+		       double *nzval, int *colind, int *rowptr,
 		       Stype_t stype, Dtype_t dtype, Mtype_t mtype)
 {
    NRformat *Astore;
@ -91,14 +91,14 @@ sCopy_CompCol_Matrix(SuperMatrix *A, SuperMatrix *B)
    Bstore   = (NCformat *) B->Store;
    Bstore->nnz = nnz = Astore->nnz;
    for (i = 0; i < nnz; ++i)
-	((float *)Bstore->nzval)[i] = ((float *)Astore->nzval)[i];
+	((double *)Bstore->nzval)[i] = ((double *)Astore->nzval)[i];
    for (i = 0; i < nnz; ++i) Bstore->rowind[i] = Astore->rowind[i];
    for (i = 0; i <= ncol; ++i) Bstore->colptr[i] = Astore->colptr[i];
 }


 void
-sCreate_Dense_Matrix(SuperMatrix *X, int m, int n, float *x, int ldx,
+sCreate_Dense_Matrix(SuperMatrix *X, int m, int n, double *x, int ldx,
 		    Stype_t stype, Dtype_t dtype, Mtype_t mtype)
 {
    DNformat    *Xstore;
@ -112,12 +112,12 @@ sCreate_Dense_Matrix(SuperMatrix *X, int m, int n, float *x, int ldx,
    if ( !(X->Store) ) ABORT("SUPERLU_MALLOC fails for X->Store");
    Xstore = (DNformat *) X->Store;
    Xstore->lda = ldx;
-    Xstore->nzval = (float *) x;
+    Xstore->nzval = (double *) x;
 }

 void
-sCopy_Dense_Matrix(int M, int N, float *X, int ldx,
-			float *Y, int ldy)
+sCopy_Dense_Matrix(int M, int N, double *X, int ldx,
+			double *Y, int ldy)
 {
 /*
 *
@ -135,7 +135,7 @@ sCopy_Dense_Matrix(int M, int N, float *X, int ldx,

 void
 sCreate_SuperNode_Matrix(SuperMatrix *L, int m, int n, int nnz, 
-			float *nzval, int *nzval_colptr, int *rowind,
+			double *nzval, int *nzval_colptr, int *rowind,
 			int *rowind_colptr, int *col_to_sup, int *sup_to_col,
 			Stype_t stype, Dtype_t dtype, Mtype_t mtype)
 {
@ -166,14 +166,14 @@ sCreate_SuperNode_Matrix(SuperMatrix *L, int m, int n, int nnz,
 */
 void
 sCompRow_to_CompCol(int m, int n, int nnz, 
-		    float *a, int *colind, int *rowptr,
-		    float **at, int **rowind, int **colptr)
+		    double *a, int *colind, int *rowptr,
+		    double **at, int **rowind, int **colptr)
 {
    register int i, j, col, relpos;
    int *marker;

    /* Allocate storage for another copy of the matrix. */
-    *at = (float *) floatMalloc(nnz);
+    *at = (double *) doubleMalloc(nnz);
    *rowind = (int *) intMalloc(nnz);
    *colptr = (int *) intMalloc(n+1);
    marker = (int *) intCalloc(n);
@ -207,13 +207,13 @@ sPrint_CompCol_Matrix(char *what, SuperMatrix *A)
 {
    NCformat     *Astore;
    register int i,n;
-    float       *dp;
+    double       *dp;
    
    printf("\nCompCol matrix %s:\n", what);
    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
    n = A->ncol;
    Astore = (NCformat *) A->Store;
-    dp = (float *) Astore->nzval;
+    dp = (double *) Astore->nzval;
    printf("nrow %d, ncol %d, nnz %d\n", A->nrow,A->ncol,Astore->nnz);
    printf("nzval: ");
    for (i = 0; i < Astore->colptr[n]; ++i) printf("%f  ", dp[i]);
@ -230,14 +230,14 @@ sPrint_SuperNode_Matrix(char *what, SuperMatrix *A)
 {
    SCformat     *Astore;
    register int i, j, k, c, d, n, nsup;
-    float       *dp;
+    double       *dp;
    int *col_to_sup, *sup_to_col, *rowind, *rowind_colptr;
    
    printf("\nSuperNode matrix %s:\n", what);
    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
    n = A->ncol;
    Astore = (SCformat *) A->Store;
-    dp = (float *) Astore->nzval;
+    dp = (double *) Astore->nzval;
    col_to_sup = Astore->col_to_sup;
    sup_to_col = Astore->sup_to_col;
    rowind_colptr = Astore->rowind_colptr;
@ -279,12 +279,12 @@ sPrint_Dense_Matrix(char *what, SuperMatrix *A)
 {
    DNformat     *Astore;
    register int i;
-    float       *dp;
+    double       *dp;
    
    printf("\nDense matrix %s:\n", what);
    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
    Astore = (DNformat *) A->Store;
-    dp = (float *) Astore->nzval;
+    dp = (double *) Astore->nzval;
    printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,Astore->lda);
    printf("\nnzval: ");
    for (i = 0; i < A->nrow; ++i) printf("%f  ", dp[i]);
@ -301,9 +301,9 @@ sprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu)
    int     i, k, fsupc;
    int     *xsup, *supno;
    int     *xlsub, *lsub;
-    float  *lusup;
+    double  *lusup;
    int     *xlusup;
-    float  *ucol;
+    double  *ucol;
    int     *usub, *xusub;

    xsup    = Glu->xsup;
@ -339,7 +339,7 @@ sprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu)
 * Check whether tempv[] == 0. This should be true before and after 
 * calling any numeric routines, i.e., "panel_bmod" and "column_bmod". 
 */
-void scheck_tempv(int n, float *tempv)
+void scheck_tempv(int n, double *tempv)
 {
    int i;
 	
@ -354,12 +354,12 @@ void scheck_tempv(int n, float *tempv)


 void
-sGenXtrue(int n, int nrhs, float *x, int ldx)
+sGenXtrue(int n, int nrhs, double *x, int ldx)
 {
    int  i, j;
    for (j = 0; j < nrhs; ++j)
 	for (i = 0; i < n; ++i) {
-	    x[i + j*ldx] = 1.0;/* + (float)(i+1.)/n;*/
+	    x[i + j*ldx] = 1.0;/* + (double)(i+1.)/n;*/
 	}
 }

@ -367,13 +367,13 @@ sGenXtrue(int n, int nrhs, float *x, int ldx)
 * Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's
 */
 void
-sFillRHS(trans_t trans, int nrhs, float *x, int ldx,
+sFillRHS(trans_t trans, int nrhs, double *x, int ldx,
         SuperMatrix *A, SuperMatrix *B)
 {
    DNformat *Bstore;
-    float   *rhs;
-    float one = 1.0;
-    float zero = 0.0;
+    double   *rhs;
+    double one = 1.0;
+    double zero = 0.0;
    int      ldc;
    char transc[1];

@ -390,10 +390,10 @@ sFillRHS(trans_t trans, int nrhs, float *x, int ldx,
 }

 /* 
- * Fills a float precision array with a given value.
+ * Fills a double precision array with a given value.
 */
 void 
-sfill(float *a, int alen, float dval)
+sfill(double *a, int alen, double dval)
 {
    register int i;
    for (i = 0; i < alen; i++) a[i] = dval;
@ -404,11 +404,11 @@ sfill(float *a, int alen, float dval)
 /* 
 * Check the inf-norm of the error vector 
 */
-void sinf_norm_error(int nrhs, SuperMatrix *X, float *xtrue)
+void sinf_norm_error(int nrhs, SuperMatrix *X, double *xtrue)
 {
    DNformat *Xstore;
-    float err, xnorm;
-    float *Xmat, *soln_work;
+    double err, xnorm;
+    double *Xmat, *soln_work;
    int i, j;

    Xstore = X->Store;
@ -431,8 +431,8 @@ void sinf_norm_error(int nrhs, SuperMatrix *X, float *xtrue)
 /* Print performance of the code. */
 void
 sPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage,
-           float rpg, float rcond, float *ferr,
-           float *berr, char *equed, SuperLUStat_t *stat)
+           double rpg, double rcond, double *ferr,
+           double *berr, char *equed, SuperLUStat_t *stat)
 {
    SCformat *Lstore;
    NCformat *Ustore;
@ -475,7 +475,7 @@ sPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage,



-int print_float_vec(char *what, int n, float *vec)
+int print_double_vec(char *what, int n, double *vec)
 {
    int i;
    printf("%s: n %d\n", what, n);
--- a/intern/opennl/superlu/util.c
+++ b/intern/opennl/superlu/util.c
@ -28,8 +28,8 @@
 /* prototypes */
 flops_t LUFactFlops(SuperLUStat_t *stat);
 flops_t LUSolveFlops(SuperLUStat_t *stat);
-float SpaSize(int n, int np, float sum_npw);
-float DenseSize(int n, float sum_nw);
+double SpaSize(int n, int np, double sum_npw);
+double DenseSize(int n, double sum_nw);

 /* 
 * Global statistics variale
@ -332,27 +332,27 @@ void super_stats(int nsuper, int *xsup)

    for (i = 0; i <= nsuper; i++) {
        isize = xsup[i+1] - xsup[i];
-        whichb = (float) isize / max_sup_size * NBUCKS;
+        whichb = (double) isize / max_sup_size * NBUCKS;
        if (whichb >= NBUCKS) whichb = NBUCKS - 1;
        bucket[whichb]++;
    }
    
    printf("\tHistogram of supernode sizes:\n");
    for (i = 0; i < NBUCKS; i++) {
-        bl = (float) i * max_sup_size / NBUCKS;
-        bh = (float) (i+1) * max_sup_size / NBUCKS;
+        bl = (double) i * max_sup_size / NBUCKS;
+        bh = (double) (i+1) * max_sup_size / NBUCKS;
        printf("\tsnode: %d-%d\t\t%d\n", bl+1, bh, bucket[i]);
    }

 }


-float SpaSize(int n, int np, float sum_npw)
+double SpaSize(int n, int np, double sum_npw)
 {
    return (sum_npw*8 + np*8 + n*4)/1024.;
 }

-float DenseSize(int n, float sum_nw)
+double DenseSize(int n, double sum_nw)
 {
    return (sum_nw*8 + n*8)/1024.;;
 }
--- a/intern/opennl/superlu/util.h
+++ b/intern/opennl/superlu/util.h
@ -97,7 +97,7 @@ typedef enum {
    RCOND,   /* estimate reciprocal condition number */
    SOLVE,   /* forward and back solves */
    REFINE,  /* perform iterative refinement */
-    SLU_FLOAT,   /* time spent in floating-point operations */
+    SLU_FLOAT,   /* time spent in doubleing-point operations */
    TRSV,    /* fraction of FACT spent in xTRSV */
    GEMV,    /* fraction of FACT spent in xGEMV */
    FERR,    /* estimate error bounds after iterative refinement */
@ -108,7 +108,7 @@ typedef enum {
 /***********************************************************************
 * Type definitions
 ***********************************************************************/
-typedef float    flops_t;
+typedef double    flops_t;
 typedef unsigned char Logical;

 /*