// 
// NagMatrix.cc
// 
// A class for manipulating (2D) matrices (uses either NAG or BLAS/LAPACK library)
//

#include <cstdio>
#include <cstdlib>
#include <cassert>
#include <cstring>  // for memcpy

#include "NagMatrix.h"
#include "tracker_defines_types_and_helpers.h"
#include "text_output.h"

namespace ReadingPeopleTracker
{

#ifdef USE_FLOAT
// single precision math
#define dsyev_ ssyev_
#define dgetrf_ sgetrf_
#define dgetri_ sgetri_
#define dgemv_ sgemv_
#define dgemm_ sgemm_
#define dgeev_ sgeev_
#define dgeqrf_ sgeqrf_
#define dorgqr_ sorgqr_
#define dgesvd_ sgesvd_
#define dgels_ sgels_
// please note: the following conversions might not be needed anymore
// as this was NAG's old naming scheme: ...f for double, ...e for single
// precision.   Please check with your NAG implementation. 
#define f01aaf_ f01aae_
#define f02abf_ f02abe_
#define f02agf_ f02age_
#define f01qcf_ f01qce_
#define f01qef_ f01qee_
#define f02wef_ f02wee_
#define f04jgf_ f04jge_
#endif

#ifdef USE_LAPACK

#ifdef WIN32

#include <mkl.h>  // Intel Math Kernel Library, a BLAS/LAPACK implementation

#ifdef USE_FLOAT
// single precision math
#define dsyev_ SSYEV
#define dgetrf_ SGETRF
#define dgetri_ SGETRI
#define dgemm_ SGEMM
#define dgeev_ SGEEV
#define dgemv_ SGEMV
#define dgeqrf_ SGEQRF
#define dorgqr_ SORGQR
#define dgesvd_ SGESVD
#define dgels_ SGELS
#else
// double precision math
#define dsyev_ DSYEV
#define dgetrf_ DGETRF
#define dgetri_ DGETRI
#define dgemv_ DGEMV
#define dgemm_ DGEMM
#define dgeev_ DGEEV
#define dgeqrf_ DGEQRF
#define dorgqr_ DORGQR
#define dgesvd_ DGESVD
#define dgels_ DGELS
#endif // ifdef USE_FLOAT

#else  // ifdef WIN32

extern "C"
{
    int ilaenv_(int *ispec, char *name, char *opts, int *n1,
		int *n2, int *n3, int *n4);
    void dsyev_(char *jobz, char *uplo, int *n, realno *a,
		int *lda, realno *w, realno *work, int *lwork, int *info);
    
    void dgetrf_(int *m, int *n, realno *a, int * lda, int *ipiv, int *info);
    
    void dgetri_ (int *n, realno *a, int *lda, int *ipiv, realno *work, int *lwork, int *info);
    
    void dgeev_ (char *jobvl, char *jobvr, int *n, realno *a, int *lda,
		 realno *wr, realno *wi, realno *vl, int *ldvl, 
		 realno *vr, int *ldvr, realno *work, int *lwork, int *info);    

    void dgeqrf_(int *m, int *n, realno *a, int *lda, realno *tau, realno *work, 
	         int *lwork, int *info);

    void dorgqr_(int *m, int *n, int *k, realno *a, int *lda, realno *tau, realno *work,
		 int *lwork, int *info);	

    void dgesvd_(char *cha, char *chb, int *m, int *n, realno *a,int *lda, realno *s, realno *u,
		int *ldu, realno *v, int *ldvt, realno *work, int *lwork, int *info );	

    void dgels_(char *ch, int *m, int *n, int *nrhs, realno *a, int *lda, realno *b,
	       int *ldb, realno *work, int *lwork, int *info);
    
}
#endif // ifdef WIN32 else

#else  // ifdef USE_LAPACK

// use NAG

// default value for IFAIL variable, determines what NAG should do on failure
const int NagMatrix::DEF_IFAIL = -1; // show error message but continue

extern "C"
{
    void f01aaf_ (realno*, int*, int*, realno*, int*, realno*, int* );
    void f02abf_ (realno*, int*, int*, realno*, realno*, int*, 
		  realno*, int*);
    void f02agf_(realno*, int*, int*, realno*, realno*, realno*, int*,
		 realno*, int*, int*, int*);
    void f01qcf_(int*, int*, realno*, int*, realno*, int*);
    void f01qef_(char*, int*, int*, int*, realno* , int*, realno*, realno *,
		 int*);
    void f02wef_(int*, int*, realno*, int*, int*, realno*, int*, int*, 
		 realno*, int*, realno*, int*, realno*, int*,
		 realno* , int*);

    void f04jgf_(int *M, int *N, realno *A, int *NRA, realno *B, realno *TOL,
		 bool *SVD, realno *SIGMA, int *IRANK, realno *WORK, int *LWORK, int *IFAIL);

}

#endif  // ifdef USE_LAPACK else

#ifndef WIN32
extern "C"
{
    void dgemv_ (char*, int*, int*, realno*, realno*, int*, const realno*, int*, realno*,
		 realno*, int* );
    void dgemm_ (char*, char*, int*, int*, int*, realno*, realno*, int*, 
		 realno*, int*, realno*, realno*, int*);
}
#endif

void NagMatrix::matrix_error(const char *message) const
{
    cerror << "Error in NagMatrix library routine: "
	   << message << endl;
    abort();
}

NagMatrix::NagMatrix(unsigned int n, unsigned int m)
{
    rows = n;
    columns = m;
    data = new realno[n*m];
    own_memory = true;
}

void NagMatrix::reconstruct(unsigned int n, unsigned int m)
{
    if ((data != NULL) && own_memory)
	delete [] data;

    rows = n;
    columns = m;

    if ((n*m) > 0)
    {
	data = new realno[n*m];
	own_memory = true;
    }
    else
    {
	data = NULL;
	own_memory = false;
    }
}


NagMatrix::NagMatrix(unsigned int n, unsigned int m, realno init)
{
    rows = n;
    columns = m;
    data = new realno[n*m];
    own_memory = true;

    realno *element = data;
    register unsigned int num_elements = n * m;
    
    for (register unsigned int i = 0; i < num_elements; i++)
	*element++ = init;
}

NagMatrix::NagMatrix(unsigned int n, unsigned int m, const NagVector &v)
{
    rows = n;
    columns = m;

    assert(((n * m) == v.get_size()) || ((n == m) && (n == v.get_size())));  // enforce valid initialisation

    if ((n * m) == v.get_size())
    {
	data = new realno[n*m];
	own_memory = true;

	memcpy((void*)data, (void*)v.get_data_const(), n * m * sizeof(realno));
    }
    else
	if ((n == m) && (n == v.get_size()))
	{
	    // create matrix with values from v on the diagonal, 0 elsewhere
	    data = new realno[n*m];
	    own_memory = true;

	    clear(0);
	    for (unsigned int i = 0; i < rows; i++)
		set(i,i, v[i]);
	}
	else
	{
	    // no valid data given for initialisation
	    data = NULL;
	    own_memory = false;
	}
    
}

void NagMatrix::reconstruct(unsigned int n, unsigned int m, const NagVector &v)
{
    if ((data != NULL) && own_memory)
	delete [] data;
    
    rows = n;
    columns = m;

    assert (n*m == v.get_size());  // force same size
    
    data = new realno[n*m];
    own_memory = true;

    memcpy((void*)data, (void*)v.get_data_const(), n * m * sizeof(realno));
}

realno NagMatrix::identity_fn(unsigned int i, unsigned int j)
{
    if (i == j)
	return 1;
    return 0;
}

// initialising matrix with values generated by a function of row and column
NagMatrix::NagMatrix(unsigned int n, unsigned int m,
		     realno (*func) (unsigned int, unsigned int))
{
    rows = n;
    columns = m;
    data = new realno[n*m];
    own_memory = true;

    for (unsigned int i = 0; i < rows; i++)
	for (unsigned int j = 0; j < columns; j++)
	    set(i,j, (*func) (i,j));
}

NagMatrix::~NagMatrix()
{
    if ((data != NULL) && own_memory)
	delete [] data;
}

void NagMatrix::transpose (NagMatrix &result) const
{
    if (result.data == NULL)
	result.reconstruct(columns, rows);

    for (unsigned int i = 0; i < rows; i++)
	for (unsigned int j = 0; j < columns; j++)
	    result.set(j,i,read(i,j));
}

void NagMatrix::invert(NagMatrix &result) const
{
    if (rows != columns) 
	matrix_error(" cannot invert non-square matrix");
#ifdef USE_LAPACK
    copy(result);
    int N = (int) columns;
    int LDA = (int) rows;
    int *IPIV = new int[N];
    int INFO;
//    int ISPEC = 1; 
    //int NB = ilaenv_(&ISPEC, "dgetri", "", &N, &LDA, &minus1, &minus1);
    int NB = 64;
    int LWORK = (N * NB);
    NagVector WORK(LWORK);
    dgetrf_(&LDA, &N, result.data, &LDA, IPIV, &INFO);
    if (INFO != 0) matrix_error(" cannot factorise");
    dgetri_(&N, result.data, &LDA, IPIV, WORK.get_data(), &LWORK, &INFO);
    if (INFO != 0) matrix_error(" cannot invert matrix");
#else
    if (result.data == NULL) result.reconstruct(rows, columns);
    int IA = (int) rows;
    int N = (int) rows;
    int IX = result.no_rows();
    NagMatrix A (*this);    // make a copy because the NAG routine modifies A.
    NagVector P(rows);
    int IFAIL = DEF_IFAIL;  // return value: > 0 on failure, value on entry determines error message
    f01aaf_(A.data, &IA, &N, result.data, &IX, P.data, &IFAIL);
    if (IFAIL != 0) matrix_error(" failed to invert");
#endif
}

void NagMatrix::scale(const realno s, NagMatrix &result) const
{
    if (result.data == NULL)
	result.reconstruct(rows, columns);

    assert (result.no_rows() == rows);
    assert (result.no_columns() == columns);

    realno *data1 = data;
    realno *data2 = result.data;

    for (unsigned int i = rows * columns; i > 0 ; i--)
	*data2++ = s * *data1++;
}


void NagMatrix::clear(const realno s)
{
    realno *data1 = data;
    for (unsigned int i = rows * columns; i > 0 ; i--)
	*data1++ = s;
}

void NagMatrix::add (const NagMatrix &m2, NagMatrix &result) const
{
    if (result.data == NULL) result.reconstruct(rows, columns);
    if ((rows != m2.rows) || (columns != m2.columns) ||
	(rows != result.rows) || (columns != result.columns))
	matrix_error(" illegal Matrix to add");
    realno *data1 = data;
    realno *data2 = m2.data;
    realno *data_res = result.data;
    for (unsigned int i = rows * columns; i > 0; i--)
	*data_res++ = (*data1++) + (*data2++);
}

void NagMatrix::subtract (const NagMatrix &m2,  NagMatrix &result) const
{
    if (result.data == NULL) result.reconstruct(rows, columns);
    if ((rows != m2.rows) || (columns != m2.columns) ||
	(rows != result.rows) || (columns != result.columns))
	matrix_error(" illegal Matrix to subtract");
    realno *data1 = data;
    realno *data2 = m2.data;
    realno *data_res = result.data;
    for (unsigned int i = rows * columns; i > 0; i--)
	*data_res++ = (*data1++) - (*data2++);
}

void NagMatrix::multiply (const NagMatrix &m2,  NagMatrix &result) const
{
    if (result.data == NULL) result.reconstruct(rows, m2.columns);
    if ((rows != result.rows) || (m2.columns != result.columns)
	|| (columns != m2.rows))
	matrix_error(" error in matrix multiply");
    char transa = 'N';
    char transb = 'N';
    int M = (int) rows;
    int N = (int) m2.columns;
    int K = (int) columns;
    realno alpha = 1.0;
    realno beta = 0.0;
    
    int LDA = (int) rows;
    int LDB = (int) m2.rows;
    int LDC = (int) result.rows;
    
    realno *B = m2.data;
    realno *C = result.data;
    realno *A = data;
    dgemm_(&transa, &transb, &M, &N, &K, &alpha, A, &LDA,
	   B, &LDB, &beta, C, &LDC);
}

void NagMatrix::multiply (const NagMatrix &m2,  NagMatrix &result, 
			  char transa, char transb, realno alpha) const
{
    if (transa == 't')  transa = 'T';
    if (transb == 't')  transb = 'T';

    int M, N, K;

    if (transa == 'T')
	M = (int) columns;
    else
	M = (int) rows;

    if (transb == 'T')
	N = (int) m2.rows;
    else
	N = (int) m2.columns;
   
    if (transa == 'T')
	K = (int) rows;
    else
	K = (int) columns;
    
    if (((transb == 'T') && (K != m2.columns))
	|| ((transb != 'T') && (K != m2.rows)))
	matrix_error("bad call to matrix multiply");
    
    realno beta = 0.0;
    
    if (result.data == NULL) result.reconstruct(M, N);
    
    int LDA = (int) rows;
    int LDB = (int) m2.rows;
    int LDC = (int) result.rows;
    
    realno *B = m2.data;
    realno *C = result.data;
    realno *A = data;
    dgemm_(&transa, &transb, &M, &N, &K, &alpha, A, &LDA,
	   B, &LDB, &beta, C, &LDC);
}

void NagMatrix::multiply(const NagVector &m, NagVector &r) const
{
    if (r.get_data() == NULL) r.reconstruct(rows);
    if ((columns > m.get_size()) || (rows > r.get_size()))
	matrix_error(" vector too small \n");
    char trans = 'N';
    int M = (int) rows;
    int N = (int) columns;
    realno alpha = 1.0;
    realno beta = 0.0;
    int LDA = (int) rows;
    const realno *X = m.get_data_const();
    int INCX = 1;
    realno *Y = r.get_data();
    realno *A = data;
    int INCY = 1;
    dgemv_(&trans,&M, &N, &alpha, A, &LDA, X, &INCX, &beta, Y, &INCY);
}

void NagMatrix::multiply(const NagVector &m, NagVector &r, char trans,
			 realno alpha)
{
    if (trans == 't') trans = 'T';
    if (trans == 'n') trans = 'N';
    int new_rows = rows;
    int new_cols = columns;
    
    if (trans == 'T') 
    {
	new_rows = columns;
	new_cols = rows;
    }

    if (r.get_data() == NULL) r.reconstruct(new_rows);
    if ((new_cols > m.get_size()) || (new_rows > r.get_size()))
	matrix_error(" vector too small \n");
    
    int M = (int) rows;
    int N = (int) columns;
    realno beta = 0.0;
    int LDA = (int) rows;
    const realno *X = m.get_data_const();
    int INCX = 1;
    realno *Y = r.get_data();
    realno *A = data;
    int INCY = 1;
    dgemv_(&trans,&M, &N, &alpha, A, &LDA, X, &INCX, &beta, Y, &INCY);
}

void NagMatrix::output() const
{
    for (unsigned int i = 0; i < rows; i++)
    {
	if (rows > 1)
	{
	    // produce nice matrix bracket
	    if (i == 0)
		cinfo << " / ";
	    else
		if (i == (rows - 1))
		    cinfo << " \\ ";
		else
		    cinfo << " | ";
	}
	else
	    cinfo << " [ ";

	// write row
	for (unsigned int j = 0; j < columns; j++)
	    cinfo << " " << read(i,j) << " ";

	if (rows > 1)
	{
	    // produce nice matrix bracket
	    if (i == 0)
		cinfo << " \\ " << endl;
	    else
		if (i == (rows - 1))
		    cinfo << " / " << endl;
		else
		    cinfo << " | " << endl;
	}
	else
	    cinfo << " ] " << endl;
    }
}

void NagMatrix::get_column(unsigned int c, NagVector &result) const
{
    assert (c < columns);

    if (result.get_data() == NULL)
	result.reconstruct(rows);
    
    assert (result.get_size() >= rows);  // should be after reconstruct

    unsigned int row;
    
    for (row=0; row < rows; row++)
	result.set (row, read (row,c));
}

void NagMatrix::get_row(unsigned int r, NagVector &result) const
{
    assert (r < rows);

    if (result.get_data() == NULL)
	result.reconstruct(columns);

    assert (result.get_size() >= columns);  // should be after reconstruct

    unsigned int col;
    
    for (col=0; col < columns; col++)
	result.set (col,read (r,col));
} 

void NagMatrix::set_column(const unsigned int c, const NagVector &column)
{
    assert (c < columns);
    assert (column.get_size() <= rows);
    
    realno *pnt1 = get(0,c);
    const realno *pnt2 = &(column[column.get_size()-1]);
    const realno *pnt3 = column.get_data_const();

    while (pnt3 <= pnt2)
	*pnt1++ = *pnt3++;
}

void NagMatrix::set_row(const unsigned int r, const NagVector &row)
{
    assert (r < rows);
    assert (row.get_size() <= columns);

    realno *pnt1 = get(r,0);
    const realno *pnt2 = &(row[row.get_size()-1]);
    const realno *pnt3 = row.get_data_const();

    for (;pnt3 <= pnt2; pnt1 += rows)
	*pnt1 = *pnt3++;
}

void NagMatrix::set_block(const unsigned int row_off, const unsigned int col_off, const NagMatrix &block_m)
{
    if (((block_m.rows + row_off) > rows) ||
	(block_m.columns + col_off) > columns)
	matrix_error(" bad call to set_block");
    
    unsigned int i1 = row_off;
    for (unsigned int i2 = 0; i2 < block_m.rows; i2++)
    {
	unsigned int j1 = col_off;
	for (unsigned int j2 = 0; j2 < block_m.columns; j2++)
	    set(i1,j1++,block_m.read(i2,j2));
	
	i1++;
    }
}

void NagMatrix::get_block(const unsigned int row_top, const unsigned int row_bottom,
			  const unsigned int col_left, const unsigned int col_right,
			  NagMatrix &block) const
{
    if (block.data == NULL)
	block.reconstruct(row_bottom - row_top + 1,
			  col_right - col_left + 1);
    
    if (((block.rows + row_top) > rows) ||
	((block.columns + col_left) > columns))
	matrix_error("bad call to get_block");
    
    for (unsigned int i1 = row_top; i1 <= row_bottom; i1++)
	for (unsigned int j1 = col_left; j1 <= col_right; j1++)
	    block.set(i1 - row_top, j1 - col_left, read(i1,j1));
}


void NagMatrix::copy (NagMatrix &res) const
{
    if (res.data == NULL)
	res.reconstruct(rows, columns);
    
    assert (res.no_rows() == rows);
    assert (res.no_columns() == columns);

//    realno *pnt1 = data;
//    realno *pnt2 = get(rows-1, columns-1);
//    realno *pnt3 = res.data;
//
//    while (pnt1 <= pnt2)
//	*pnt3++ = *pnt1++;

    memcpy((void*)res.data, (void*)data, rows*columns*sizeof(realno));
}


void NagMatrix::operator= (const NagMatrix &m2)
{
    if ((data != NULL) && own_memory)
	delete [] data;

    rows = m2.rows;
    columns = m2.columns;
    
    if ((rows*columns) > 0)
    {
	data = new realno[rows*columns];
	own_memory = true;

	memcpy((void*)data, (void*)m2.data, rows * columns * sizeof(realno));
    }
    else
    {
	data = NULL;
	own_memory = false;
    }
}

void NagMatrix::square_entries (NagMatrix &res) const
{
    if (res.data == NULL) res.reconstruct(rows, columns);
    if ((rows != res.rows) || (columns != res.columns))
	matrix_error(" bad call to square_entries");
    realno *pnt1 = data;
    realno *pnt2 = res.data;
    realno *edat = data + (rows * columns);
    for (;pnt1 < edat; pnt1++)
	*pnt2++ = (*pnt1 * *pnt1);
}


void NagMatrix::pseudo_invert (NagMatrix &res) const
{
    if (res.data == NULL) res.reconstruct(columns, rows);
    if ((rows != res.columns) || (columns != res.rows))
	matrix_error(" bad call to pseudo_inverse");
    
    NagMatrix h_t(columns,rows);
    transpose(h_t);
    
    if (rows > columns)
    {
	
	NagMatrix hTh(columns, columns);
	
	h_t.multiply(*this, hTh);
	
	NagMatrix inv_hTh(columns, columns);
	hTh.invert(inv_hTh);
	
	inv_hTh.multiply(h_t, res);
    }
    else
    {
	NagMatrix hhT(rows,rows);
	multiply(h_t, hhT);
	
	NagMatrix inv_hhT(rows,rows);
	hhT.invert(inv_hhT);
	
	h_t.multiply(inv_hhT, res);
    }
    
}

// get_eigenvectors(result, evals))
//  calculates eigenvalues, eigenvectors of real symmetric matrix
//
//  on entry:
//    (*this) real symmetric matrix
//  on exit:
//     result = (v_1 ... v_n) eigenvectors
//      evals = (e_1 ... e_n) eigenvalues in ascending order


void NagMatrix::get_eigenvectors (NagMatrix &result, NagVector &evals) const
{
    if (rows != columns) matrix_error("bad call to get_eigenvectors");
    if (evals.get_data() == NULL) evals.reconstruct(rows);
    if (result.data == NULL) result.reconstruct(rows, rows);
    if ((evals.get_size() != rows) || (result.rows != rows) ||
	(result.columns != columns))
	matrix_error("bad call to get_eigenvectors");
    
#ifdef USE_LAPACK
    copy(result);
    char JOBZ = 'V';       // 'V':  Compute eigenvalues and eigenvectors
    char UPLO = 'U';       // 'U':  Upper triangle of A is stored
    int N = (int) rows;    // The order of the matrix A
    int LDA = (int) rows;
//    int ISPEC = 1;
    //int NB = ilaenv_(&ISPEC, "dsytrd", "U",  &N, &LDA, &minus1, &minus1); 
    int NB = 1;            // number of blocks; NB=1: unblocked algorithm
    int LWORK = (NB+2)*N;
    int INFO;
    NagVector WORK(LWORK);

    dsyev_(&JOBZ, &UPLO, &N, result.data, &LDA, evals.get_data(),
	   WORK.get_data(), &LWORK, &INFO);

    if (INFO != 0)
	matrix_error("could not get eigenvectors");    
#else
    int N = (int) rows;
    NagMatrix A (*this);    // make a copy because the NAG routine modifies A.
    NagVector temp(N);
    int IFAIL = DEF_IFAIL;  // return value: > 0 on failure; value on entry determines error message
    f02abf_ (A.data, &N, &N, evals.data, result.data, &N, temp.data, &IFAIL);

    if (IFAIL != 0)
	matrix_error("failed to get eigensystem");
#endif  
    
}

// get_eigensystem 
// finds the (complex) eigenvalues, eigenvectors
// of an arbitrary square non-symmetric real-valued matrix

void NagMatrix::get_eigensystem (NagMatrix &res_r,  // output: right eigenvectors, real part
				 NagMatrix &res_i,  // output: right eigenvectors, imag part
				 NagVector &evals_r,
				 NagVector &evals_i) const
{
    assert (rows == columns);    // otherwise bad call to get_eigensystem (non-square matrix)

    if (evals_r.get_data() == NULL) evals_r.reconstruct(rows);
    if (evals_i.get_data() == NULL) evals_i.reconstruct(rows);
    if (res_r.data == NULL) res_r.reconstruct(rows, rows);
    if (res_i.data == NULL) res_i.reconstruct(rows, rows);
    
    // make sure matrix dimensions are sufficient to avoid overflow...
    assert (res_r.no_rows() >= rows);
    assert (res_r.no_columns() >= rows);
    assert (res_i.no_rows() >= rows);
    assert (res_i.no_columns() >= rows);
    assert (evals_r.get_size() >= rows);
    assert (evals_i.get_size() >= rows);

#ifdef USE_LAPACK
    NagMatrix A (*this);         // make a copy because the BLAS/LAPACK routine modifies A.

    cinfo << "WARNING: NagMatrix::get_eigensystem() does not seem functional with LAPACK." << endl;

    // dgeev settings...
    int N = (int) rows;
    int LWORK = 4 * N;           // workspace size
//    if (best_work > LWORK)
//        LWORK = best_work;        // use optimal workspace size determined last time
    char JOBVL = 'N';            // left eigenvectors are not to be computed
    char JOBVR = 'V';            // right eigenvectors are to be computed
    int LDA = N;                 // leading dimension of A is N
    int LDVL = 1;                // leading dimension of left eigenvectors matrix, >= 1
    int LDVR = res_r.no_rows();  // leading dimension of right eigenvectors matrix, >= N
    
    NagVector WORK(LWORK);       // workspace

    if (WORK.get_data() == NULL)
	matrix_error("couldn't assign workspace");

    int INFO;
    
    dgeev_(&JOBVL, &JOBVR, &N, A.data, &LDA, evals_r.get_data(),
	   evals_i.get_data(), NULL, &LDVL, res_r.data, &LDVR, WORK.get_data(),
	   &LWORK, &INFO);
    
    if (INFO != 0)
	matrix_error("could not get eigenvectors");
//    else    
//	best_work = (int) WORK[1]; // save optimal workspace size


    // unpack the complex eigenvectors
    NagVector zero_v(N);
    zero_v.clear();
    NagVector tmp_v;

    for (unsigned int j = 0; j < N; j++)
    {
	if (evals_i[j] == 0)
	    res_i.set_column(j, zero_v);
	else
	{
	    res_r.get_column(j+1, tmp_v);
	    res_i.set_column(j, tmp_v);
	    tmp_v *= -1.0;
	    res_i.set_column(j+1, tmp_v);
	    
	    res_r.get_column(j, tmp_v);
	    res_r.set_column(j+1, tmp_v);
	    
	    j++;
	}
    }
    
#else
    NagMatrix A (*this);    // make a copy because the NAG routine modifies A.
    int IA = (int) rows;
    int N = (int) rows;
    int IVR = (int) res_r.no_rows();
    int IVI = (int) res_i.no_rows();
    int *INTGER = new int[N];
    int IFAIL = DEF_IFAIL;  // return value: > 0 on failure, value on entry determines error message
    f02agf_(A.data, &IA, &N, evals_r.data, evals_i.data,
	    res_r.data, &IVR, res_i.data, &IVI, INTGER, &IFAIL);
    delete [] INTGER;
#endif  
    
}

void NagMatrix::QR_factorise(NagMatrix &Q) const
{
#ifdef USE_LAPACK
    NagMatrix A (*this);    // make a copy because the NAG routine modifies A.
    int M = (int) rows;
    int N = (int) columns;
    if (Q.data == NULL) Q.reconstruct(M,M);
    int LDA = M;
    int K = N;
    int INFO;
    int NB=4;

    int LWORK =NB*N;
    NagVector WORK(LWORK);
    NagVector zeta(N);
   
    dgeqrf_(&M, &N, A.data, &LDA, zeta.get_data(), WORK.get_data(), &LWORK, &INFO );

    if (INFO != 0) matrix_error(" can't QR factorise");

    dorgqr_(&M, &N, &K, A.data, &LDA, zeta.get_data(), WORK.get_data(), &LWORK, &INFO );

    if (INFO != 0) matrix_error(" can't QR factorise");

    A.copy(Q);

#else
    NagMatrix A (*this);    // make a copy because the NAG routine modifies A.
    int M = (int) rows;
    int N = (int) columns;
    if (Q.data == NULL) Q.reconstruct(M,M);
    int LDA = M;
    int K = N;
    NagVector zeta(N);
    NagVector WORK(K);
    int IFAIL = DEF_IFAIL;  // return value: > 0 on failure, value on entry determines error message

    f01qcf_(&M, &N, A.data, &LDA, zeta.data, &IFAIL);

    IFAIL = DEF_IFAIL;      // re-set

    f01qef_("S", &M, &N, &K, A.data, &LDA, zeta.data, WORK.data, &IFAIL);
    A.copy(Q);
#endif    
}


// msqrt
// returns the positive definite square root
// of a real symmetric matrix

void NagMatrix::msqrt(NagMatrix &res)
{
    if (rows != columns)
	matrix_error("bad call to NagMatrix::msqrt");
    
    NagMatrix evecs;
    NagVector evals;
    
    get_eigenvectors(evecs, evals);
    NagMatrix lambda_sqrt(rows, rows, 0.0);
    for (unsigned int i = 0; i < rows; i++)
	*lambda_sqrt.get(i,i) = sqrt(evals[i]);
    NagMatrix evecs_t;  
    evecs.transpose(evecs_t);
    
    NagMatrix tmp;
    lambda_sqrt.multiply(evecs_t, tmp);
    evecs.multiply(tmp, res);
}


// sv_decompose
// Singular Value Decomposition
// A =  Q D P'
// where Q , P are orthogonal matrices

void NagMatrix::sv_decompose(NagMatrix &Q, NagVector &sv,
			     NagMatrix &P_t) const
{
#ifdef USE_LAPACK
    NagMatrix A (*this);    // make a copy because the NAG routine modifies A.

    cinfo << "WARNING: NagMatrix::sv_decompose() does not seem functional with LAPACK." << endl;

    int M = (int) rows;
    int N = (int) columns;
    int LDA = M;
    int LDU=M;                     
    int LDVT=N;
    int INFO;
    int min_mn = MIN(M,N);

    int LWORK=MAX(3*MIN(M,N)+MAX(M,N),5*MIN(M,N));
    NagVector WORK(LWORK);
  
    if (Q.data == NULL)Q.reconstruct(M,min_mn);             
    if (sv.get_data() == NULL) sv.reconstruct(min_mn);
    if (P_t.data == NULL) P_t.reconstruct(min_mn,N);

    dgesvd_( "A", "A", &M, &N, A.data,&LDA, sv.get_data(), Q.data, &LDU, P_t.data, &LDVT,
	                        WORK.get_data(),&LWORK, &INFO );

    if (INFO != 0) matrix_error(" can't sv_decompose");
     
    if (M == N) A.copy(Q);
    if (M > N) A.get_block(0,M-1,0, N-1, Q);
    if (M < N) A.get_block(0,M-1,0, N-1, P_t);

#else
    int M = (int) rows;
    int N = (int) columns;
    int LDA = M;
    NagMatrix A (*this);    // make a copy because the NAG routine modifies A.
    int NCOLB = 0;
    int LDB = 0;
    int WANTQ = 1;
    int min_mn = MIN(M,N);
    if (Q.data == NULL) 
	Q.reconstruct(M,min_mn);
    int LDQ = M;
    if (sv.data == NULL) sv.reconstruct(min_mn);
    int WANTP = 1;
    if (P_t.data == NULL) P_t.reconstruct(min_mn,N);
    int LDPT = N;
    NagVector WORK(MAX(N * N + 5 * (N - 1), N + 4));
    
    int IFAIL = DEF_IFAIL;  // return value: > 0 on failure, value on entry determines error message
    f02wef_(&M, &N, A.data, &LDA, &NCOLB, NULL, &LDB, &WANTQ, Q.data,
	    &LDQ, sv.data, &WANTP, P_t.data, &LDPT, WORK.data, &IFAIL);
    
    if (M == N) A.copy(Q);
    if (M > N) A.get_block(0,M-1,0, N-1, Q);
    if (M < N) A.get_block(0,M-1,0, N-1, P_t);
#endif
    
}


// ortho transform
// if flag is true (default)
// 		calculates Qt A Q
// else
//		calculates Q A Qt
//
// (where Qt is Q transform)
void NagMatrix::ortho_transform(NagMatrix &Q, NagMatrix &res, const bool flag) const
{
    NagMatrix Q_t;
    NagMatrix tmp;
    Q.transpose(Q_t);
    if (flag)
    {
	multiply(Q, tmp);
	Q_t.multiply(tmp, res);
    }
    else
    {
	multiply(Q_t, tmp);
	Q.multiply(tmp, res);
    }
}

ostream &operator<<(ostream &out_s, const NagMatrix &m)
{
    out_s << m.rows << " by " << m.columns << " matrix" << endl;
    /*  NagVector col(m.rows);
	for (int i = 0; i < m.columns; i++)
	{
	m.get_column(i,col);
	out_s << "column " << i << endl << col << endl;
	} */
    NagVector v(m);
    out_s << v;
//    m.reconstruct(m.rows, m.columns, v);
    return out_s;
}

istream &operator>>(istream &in_s, NagMatrix &m)
{
    char dummy[50];
    unsigned int r,c;
    in_s >> r >> dummy >> c >> dummy;
    
    if (m.data == NULL)
	m.reconstruct(r,c);
    else
	if ((r != m.rows) || (c != m.columns)) 
	    m.matrix_error("wrong sized matrix");
    
    /*
      NagVector col(m.rows);
      for (unsigned int i = 0; i < m.columns; i++)
      {
      in_s >> dummy >> dummy >> col;
      m.set_column(i,col);
      }*/
    
    NagVector v(m);
    in_s >> v; 
    m.reconstruct(r,c,v);
    return in_s;
}

void NagMatrix::pack_to_triangle(NagVector &res) const
{
    unsigned int N = MIN(rows, columns);

    if (res.get_data() == NULL)
	res.reconstruct(N * (N+1) / 2);
    
    if (res.get_size() != (N * (N+1) / 2))
	matrix_error("bad call to pack_to_triangle");
    
    realno *pnt1 = res.get_data();
    for (unsigned int i = 0; i < N; i++)
	for (unsigned int j = 0; j <= i; j++)
	    *pnt1++ = read(i,j);
    
}

void NagMatrix::unpack_triangle(const NagVector &dat, bool symmetric)
{
    if (data == NULL)
    {
	unsigned int N = (int)
	    (sqrt(2 * dat.get_size() + 0.25) - 0.49999);
	reconstruct(N,N);
    }
    
    unsigned int N = MIN(rows, columns);
    if (dat.get_size() != (N * (N+1) / 2))
	matrix_error("bad call to unpack_triangle");
    
    const realno *pnt1 = dat.get_data_const();
    if (symmetric)
    {
	for (unsigned int i = 0; i < N; i++)
	    for (unsigned int j = 0; j <= i; j++)
		*get(i,j) = *get(j,i) = *pnt1++;
    }
    else 
    {
	for (unsigned int i = 0; i < N; i++)
	    for (unsigned int j = 0; j <= i; j++)
	    {
		*get(j,i) = 0;
		*get(i,j) = *pnt1++;
	    }
    }
}

void NagMatrix::lower_to_full(NagMatrix &X_full)
{
    // X_full = X_lower + (X_lower)^T
    NagMatrix X_t;
    transpose(X_t);
    add(X_t, X_full);
}

void NagMatrix::scale_diagonal(realno fac)
{
    if (data == NULL)
	matrix_error("bad call to scale_diagonal");
    
    unsigned int N = MIN(rows, columns);
    for (unsigned int i = 0; i < N; i++)
	*get(i,i) *= fac;
}

realno NagMatrix::trace()
{
    realno res = 0;
    unsigned int N = MIN(rows, columns);
    for (unsigned int i = 0; i < N; i++)
	res += *get(i,i);
    
    return res;
}


void NagMatrix::read_diagonal(NagVector &diag)
{
    unsigned int N = MIN(rows, columns);
    if (diag.get_data() == NULL) diag.reconstruct(N);
    if (diag.get_size() != N)
	matrix_error("bad call to read_diagonal");
    
    for (unsigned int i = 0; i < N; i++)
	diag.set (i, read(i,i));
    
}

void NagMatrix::set_diagonal(NagVector &diag)
{
    unsigned int N = MIN(rows, columns);
    N = MIN(N, diag.get_size());
    for (unsigned int i = 0; i < N; i++)
	set(i,i,diag[i]);
    
}
realno NagMatrix::trace_A_Bt(NagMatrix &A, NagMatrix &B)
{
    if ((A.rows != B.rows) || (A.columns != B.columns))
	matrix_error("bad call to trace_A_Bt");
    
    realno res = 0.0;
    for (unsigned int i = 0; i < A.rows; i++)
	for (unsigned int j = 0; j < A.columns; j++)
	    res += A.read(i,j) * B.read(i,j);
    
    return res;
}

realno NagMatrix::trace_A_B(NagMatrix &A, NagMatrix &B)
{
    if ((A.rows != B.columns) || (A.columns != B.rows))
	matrix_error("bad call to trace_A_Bt");
    
    realno res = 0.0;
    for (unsigned int i = 0; i < A.rows; i++)
	for (unsigned int j = 0; j < A.columns; j++)
	    res += A.read(i,j) * B.read(j,i);
    
    return res;
}

// solves A * x  = b with A an M x N matrix, b is M x 1
void NagMatrix::solve_equations(NagMatrix b, NagMatrix &x) const
{
#ifdef USE_LAPACK
    int N = (int) MAX(rows, columns);
    int M = (int) b.columns;
    int INFO;

    NagMatrix A (*this);    // make a copy because the NAG routine modifies A.

    if (x.data == NULL)
	x.reconstruct(N, b.columns);

    if ((x.rows != N) || (x.columns != M) || (b.rows != N))
	matrix_error("bad call to solve_equations");
    
    int IA = (int) rows;
    int IB = (int) columns;
    int LWORK = MIN(rows, columns) + MAX((int) MIN(rows, columns), M);  // workspace size

    NagVector WORK(LWORK);
    
    dgels_( "N", &IA, &IB, &M, A.data, &IA, b.data, &N, WORK.get_data(), &LWORK, &INFO);

    if (INFO != 0)
	matrix_error(" failed to solve equation");

    b.copy(x);

#else

    NagMatrix A (*this);    // make a copy because the NAG routine modifies A.
    
    int M = (int) rows;
    int N = (int) columns;
    int NRA = (int) rows;
    
    // tolerance: correctness of the elements of A
    realno TOL = 0.000005;  //  e.g. 5e-4 means correct to about 4 significant digits
    bool SVD;               // return value: whether A is of full rank
    realno SIGMA;           // return value: the standard error
    int IRANK;              // return value: the rank of A, if SVD == false
    int IFAIL = DEF_IFAIL;  // return value: > 0 on failure, value on entry determines error message
    
    int LWORK = 4 * N;
    realno WORK[LWORK];
    
    if (x.data == NULL)
	x.reconstruct(N, b.columns);

    if ((M < N) || (x.rows != N) || (x.columns != 1) || (b.rows != M))
	matrix_error("bad call to solve_equations");

    // f04jgf also solves overdetermined systems, giving the least squares solution.
    
    f04jgf_(&M, &N, A.data, &NRA, b.data, &TOL, &SVD, &SIGMA, &IRANK, &WORK[0], &LWORK, &IFAIL);

    if (IFAIL != 0)
	matrix_error("failed call to solve_equations");

    // copy over the result from b to x
    for (unsigned int column = 0; column < b.columns; column++)
	for (unsigned int row = 0; row < N; row++)
	    x.set(row, column, b.read(row, column));

#endif
    
}


void NagMatrix::flip_horizontally()
{
    NagVector c1, c2;
    unsigned int imax = (columns / 2);
    for (unsigned int i = 0; i < imax; i++)
    {
	get_column(i, c1);
	get_column(columns - i - 1, c2);
	set_column(i, c2);
	set_column(columns - i - 1, c1);
    }
}

} // namespace ReadingPeopleTracker
