# include <cstdio>   
# include <cstdlib>  
# include <cmath>    
# include <iostream>
# include <string> 
# include <fstream>
# include "nrutil.h"
# include <ctime>
# include "matrix_tool.h"
# include "stat_tool.h"
# include "ranlib.h"




using namespace std;


/********************* calculate mean for a double vector*********************/

double mean(double* vector, int length_vector){
    
    double sum = 0;
    double average;
    
    for(int dii = 1; dii <= length_vector; dii++)
        sum = sum + vector[dii];
    average = sum / length_vector;
    
    return average;
    
}





/********************* calculate variance for a double vector*********************/
double variance(double* vector, int length_vector){
    
    double average;
    average = mean(vector,length_vector);
    
    double sum;
    for(int dii = 1; dii <= length_vector; dii++)
        sum = sum + (vector[dii] - average) * (vector[dii] - average);
    
    double var;
    
    var = sum / (length_vector - 1);
    return var;
}


/*********************** factorial calculation ***********************/

int factorial(int n )
{
    if ( n <= 1 )
        return 1;
    else
        return  n * factorial( n-1 );
}





/*********************** random number generator ***********************/


/*S-PLUS pseudo-random number generator
 This based on GeorgeMarsaglia°Øs °∞Super-Duper°± package from about 1973. 
 The generator produces a 32-bit integer whose top 31 bits are divided by 231 to 
 produce a real number in [0; 1). The 32-bit integer is produced by a bitwise 
 exclusive-or of two 32-bit integers produced by separate generators.
 The current values of congrval and tausval are encoded in
 the vector .Random.seed , a vector of 12 integers in the range 0, ..., 63. If x
 represents .Random.seed, we have
 congrval = sum_{1, ...,6} (x_i) (2^{6(i-1)})
 tausval  = sum_{1, ...,6} (x_{i+6}) (2^{6(i-1)})
 */
void init1(int seeds[]){
	int i;
	int size=6;
	unsigned int ctmp,ttmp;
	ctmp=0;
	ttmp=0;
	for(i=0;i<6;i++){
		ctmp+=seeds[i]*((unsigned int)pow(2.0,6.0*i));
		ttmp+=seeds[i+6]*((unsigned int)pow(2.0,6.0*i));
	}
	congrval=ctmp;
	tausval=ttmp;
}

double s_xuni(void)
{
	unsigned int n, lambda = 69069, res;
	do {
		congrval = congrval * lambda;
		tausval ^= tausval >> 15;
		tausval ^= tausval << 17;
		n = tausval ^ congrval;
		res = (n>>1) & 017777777777;
	} while(res == 0);
	return (res / 2147483648.);
}

void setSEED(int Y [],int SEED){
	int X[]={21, 14, 49,0,1,2, 32, 22, 36, 23, 28,  3};
	int size=12;
	int i;
	unsigned int tmp=((SEED-1)%1024)*(int)pow(2.0,22.0);
    
	for(i=0;i<size;i++){
		Y[i]=X[i];
	}
	
	for(i=5;i>=3;i--){
		Y[i]=tmp/(int)pow(2.0,6.0*i);
		tmp-=Y[i]*(int)pow(2.0,6.0*i);
	}
}

void set_seed(void){
    int Y[12];
    int i;
    int test=1;
    unsigned int SEED;
    void init1(int seeds[]);
    void setSEED(int Y[], int);
    
    //test whether SEED.TXT file exist
    //if NO, use the default seed
    //  otherwise, read the seed from file 
    //             and check whether it's between 0 & 63
    FILE *fp=fopen("SEED.TXT","r");
    if(fp){
        for(i=0;i<12;i++){
            fscanf(fp,"%d",&Y[i]);
            if(Y[i]<0||Y[i]>64) {
                test=0;
                printf("Invalid Value! The range is between 0 and 63\n");
                break;
            }
        }
        fclose(fp);
    }else{
        test=0;
    }
    // use the default seeds 
    // either Y is not valid
    // or SEED.TXT doesn't exist
    if(test==0){
        SEED=rand();
        setSEED(Y,SEED);
    }
    init1(Y);
    //output results to file splus.txt
}

//output seed to SEED.TXT
void outputSeed(void){
	int i;
	int Y[12];
	unsigned int ctmp=congrval, ttmp=tausval;
	char *filenm=(char *)malloc(sizeof(char)*256);
	FILE *fout=fopen("SEED.TXT","w"); //// "w" originally!!!!
	//cout<<"congrval="<<congrval<<", tausval="<<tausval<<endl;
	for(i=5;i>=0;i--){
		Y[i]=ctmp/(int)pow(2.0,6.0*i);
		ctmp-=Y[i]*(int)pow(2.0,6.0*i);
	}
	for(i=11;i>=6;i--){
		Y[i]=ttmp/(int)pow(2.0,6.0*(i-6));
		ttmp-=Y[i]*(int)pow(2.0,6.0*(i-6));
	}
	
	for(i=0;i<12;i++){
		fprintf(fout," %d   ",Y[i]);
	}
	fclose(fout);
}



/********************* estimating VQ *********************/



void covariance_VQ(double** response, double** covariate, 
                   int num_subject, int num_response, int num_covariate, double** VQ,
                   double** waldC_trans, int wald_df){
    
   
    
    double** design_matrix = dmatrix(1,num_subject,1, num_covariate+1);
    int dii, djj; //dii -- index for row, djj index for column
    
    
    
    ////////// design matrix set up /////////////
    for(dii = 1; dii <= num_subject; dii++)
        for(djj = 1; djj <= num_covariate+1; djj++)
        {
            if(djj == 1)
                design_matrix[dii][djj] = 1; //intercept
            if(djj >= 2)
                design_matrix[dii][djj] = covariate[dii][djj-1]; //other covariate
        }
    
    
    
    ////////// wald testing C matrix re-index /////////////
    
    double** R = dmatrix(1, wald_df, 1, num_covariate + 1); 
    double** R_trans = dmatrix(1, num_covariate + 1, 1, wald_df);
    
    for(int dii = 1; dii <= wald_df; dii++)
        for(int djj = 1; djj <= num_covariate + 1; djj++)
        {
            R[dii][djj] = waldC_trans[dii][djj];
            R_trans[djj][dii] = waldC_trans[dii][djj];
        }
    
   
    
    
    //////////// beta estimation /////////////
    
    double** beta = dmatrix(1,num_covariate+1,1,num_response);
    double** tx0x = dmatrix(1,num_covariate+1,1,num_covariate+1); //t_design_matrix %*% design_matrix 
    double** t_design_matrix = dmatrix(1,num_covariate+1,1,num_subject);
    
    
    matrixtranspose(design_matrix, num_subject, num_covariate+1, t_design_matrix);
    matrixmultiply(t_design_matrix,num_covariate+1, num_subject, design_matrix, num_subject, num_covariate+1, tx0x); 
    
    
    double** tx0y = dmatrix(1,num_covariate+1,1,num_response);  
    matrixmultiply(t_design_matrix, num_covariate+1, num_subject, response, num_subject, num_response, tx0y); 
    
    
    double** tx0x_inv = dmatrix(1, num_covariate + 1, 1, num_covariate + 1); // tx0x_inv
    
    for(int dii = 1; dii <= num_covariate + 1; dii++)
        for(int djj = 1; djj <= num_covariate + 1; djj++)
            tx0x_inv[dii][djj] = tx0x[dii][djj];
    
    invv(tx0x_inv,num_covariate+1);
    matrixmultiply(tx0x_inv, num_covariate+1, num_covariate+1, tx0y, num_covariate+1, num_response, beta);
    
    
       
    ///////////// VQ estimation = B'* R'* (R*X'*X*R')^(-1)* R * B //////////
    
        
    double** t_beta = dmatrix(1, num_response, 1, num_covariate+1); // B'
    matrixtranspose(beta, num_covariate+1, num_response, t_beta);
    
    double** temp = dmatrix(1, wald_df, 1, num_covariate + 1); // R*X'*X
    matrixmultiply(R, wald_df, num_covariate + 1, tx0x, num_covariate + 1, num_covariate + 1, temp);
    
    double** temp2 = dmatrix(1, wald_df, 1, wald_df); // R*X'*X * R'
    matrixmultiply(temp, wald_df, num_covariate + 1, R_trans, num_covariate + 1, wald_df, temp2);
    invv(temp2, wald_df); //(R*X'*X * R') = (R*X'*X * R')^(-1)
    
    double** temp3 = dmatrix(1, wald_df, 1, num_response); // R * B
    matrixmultiply(R, wald_df, num_covariate + 1, beta, num_covariate + 1, num_response, temp3);
    
    double** temp3_trans = dmatrix(1, num_response, 1, wald_df); // B'* R'
    matrixtranspose(temp3, wald_df, num_response, temp3_trans);
    
    double** temp4 = dmatrix(1, num_response, 1, wald_df); // B'* R'* (R*X'*X*R')^(-1)
    matrixmultiply(temp3_trans, num_response, wald_df, temp2, wald_df, wald_df, temp4);
    
    matrixmultiply(temp4, num_response, wald_df, temp3, wald_df, num_response, VQ);
    
    
    
    free_dmatrix(temp, 1, wald_df, 1, num_covariate + 1);
    free_dmatrix(temp2, 1, wald_df, 1, wald_df);
    free_dmatrix(temp3, 1, wald_df, 1, num_response);
    free_dmatrix(temp3_trans, 1, num_response, 1, wald_df);
    free_dmatrix(temp4, 1, num_response, 1, wald_df);
   
    
    
    //////////////// free memory ////////////////
    
    free_dmatrix(design_matrix,1,num_subject,1,num_covariate+1);
    free_dmatrix(beta,1,num_covariate+1,1,num_response);
    free_dmatrix(t_beta, 1, num_response, 1, num_covariate+1);
  
    free_dmatrix(R,1, wald_df, 1, num_covariate + 1);
    free_dmatrix(R_trans, 1, num_covariate+1, 1, wald_df);
    
    
    
    free_dmatrix(tx0x, 1, num_covariate+1, 1, num_covariate+1);
    free_dmatrix(tx0x_inv, 1, num_covariate + 1, 1, num_covariate + 1);
    free_dmatrix(t_design_matrix,1, num_covariate+1, 1, num_subject);
    free_dmatrix(tx0y, 1, num_covariate+1, 1, num_response);
    
    

       
    
}
















/********************* estimating VR *********************/


void covariance_VR(double** response, double** covariate, 
                   int num_subject, int num_response, int num_covariate, double** VR){
    
    double** design_matrix = dmatrix(1,num_subject,1, num_covariate+1);
    int dii, djj; //dii -- index for row, djj index for column
    
    
    
    ////////// design matrix set up /////////////
    for(dii = 1; dii <= num_subject; dii++)
        for(djj = 1; djj <= num_covariate+1; djj++)
        {
            if(djj == 1)
                design_matrix[dii][djj] = 1; //intercept
            if(djj >= 2)
                design_matrix[dii][djj] = covariate[dii][djj-1]; //other covariate
        }
    
    
    
    
    //////////// beta estimation /////////////
    
    double** beta = dmatrix(1,num_covariate+1,1,num_response);
    double** tx0x = dmatrix(1,num_covariate+1,1,num_covariate+1); //t_design_matrix %*% design_matrix 
    double** t_design_matrix = dmatrix(1,num_covariate+1,1,num_subject);
    
    
    matrixtranspose(design_matrix, num_subject, num_covariate+1, t_design_matrix);
    matrixmultiply(t_design_matrix,num_covariate+1, num_subject, design_matrix, num_subject, num_covariate+1, tx0x); 
    
    
    double** tx0y = dmatrix(1,num_covariate+1,1,num_response);  
    matrixmultiply(t_design_matrix, num_covariate+1, num_subject, response, num_subject, num_response, tx0y); 
    
    invv(tx0x,num_covariate+1); // tx0x = tx0x^(-1)
    matrixmultiply(tx0x, num_covariate+1, num_covariate+1, tx0y, num_covariate+1, num_response, beta);
    
    
    
    free_dmatrix(tx0x, 1, num_covariate+1, 1, num_covariate+1); 
    free_dmatrix(t_design_matrix,1, num_covariate+1, 1, num_subject);
    free_dmatrix(tx0y, 1, num_covariate+1, 1, num_response);
    
    
    
    ///////////// covariance matrix estimation //////////
    
    double** residual = dmatrix(1, num_subject, 1, num_response);
    double** xbeta = dmatrix(1, num_subject, 1, num_response);
    
    matrixmultiply(design_matrix, num_subject, num_covariate+1, beta, num_covariate+1, num_response, xbeta);
    
    for(dii = 1; dii<=num_subject; dii++)
        for(djj=1; djj<=num_response; djj++)
            residual[dii][djj] = response[dii][djj] - xbeta[dii][djj]; 
    
    
    
    free_dmatrix(xbeta, 1, num_subject, 1, num_response);
    free_dmatrix(design_matrix,1,num_subject,1,num_covariate+1);
    free_dmatrix(beta,1,num_covariate+1,1,num_response);
    
    
    
    double** CE = dmatrix(1, num_response, 1, num_response); // sample covariance matrix
    
    
    for(dii=1; dii<=num_response; dii++)// covariance matrix calculation
        for(djj=1; djj<=num_response; djj++)
        {
            double sum = 0;
            for(int dll = 1; dll <= num_subject; dll++)
                sum = sum + residual[dll][dii]* residual[dll][djj];
            CE[dii][djj] = sum / num_subject;
        }
    
    
    ///////////////// mu_E calculation;
    double mu_E = 0;
    
    for(dii = 1; dii <= num_response; dii++)
        mu_E = mu_E + CE[dii][dii];
    mu_E = mu_E / num_response;
    
    
    ////////////////// rho calculation    
    double rho;
    
    
    //// calculation of denomenator
    double** temp2 = dmatrix(1, num_response, 1, num_response); // CE - mu_E*Iq    
    
    for(int dkk = 1; dkk <= num_response; dkk++)
        for(int dqq = 1; dqq <= num_response; dqq++)
        {
            if(dkk == dqq)   
                temp2[dkk][dqq] = CE[dkk][dqq] - mu_E;
            
            if(dkk != dqq)   
                temp2[dkk][dqq] = CE[dkk][dqq];   
            
        }
    
    double** temp3 = dmatrix(1, num_response, 1, num_response); // (CE - mu_E*Iq) ^2
    matrixmultiply(temp2, num_response, num_response, temp2, num_response, num_response, temp3);
    
    double denomenator = 0;
    
    for(dii = 1; dii <= num_response; dii++) // tr((CE - mu_E*Iq) ^2)
        denomenator = denomenator + temp3 [dii][dii];
    
    free_dmatrix(temp2, 1, num_response, 1 , num_response);
    free_dmatrix(temp3, 1, num_response, 1, num_response);
    
    
    ///// calculation of numerator
    
    
    double sum = 0;
    
    for(dii = 1; dii <= num_subject; dii++)
    {
        
        double** eiei = dmatrix(1, num_response, 1, num_response);
        
        for(int dkk = 1; dkk <= num_response; dkk++)
            for(int dqq = 1; dqq <= num_response; dqq++)
                eiei[dkk][dqq] = residual[dii][dkk] * residual[dii][dqq];
        
        double** temp1 = dmatrix(1, num_response, 1, num_response); // eiei - CE
        
        for(int dkk = 1; dkk <= num_response; dkk++)
            for(int dqq = 1; dqq <= num_response; dqq++)
                temp1[dkk][dqq] = eiei[dkk][dqq] - CE[dkk][dqq];
        
        double** temp2 = dmatrix(1, num_response, 1, num_response); // (eiei - CE)^2 
        matrixmultiply(temp1, num_response, num_response, temp1, num_response, num_response, temp2);
        
        double numerator = 0;  // tr((eiei - CE)^2 )
        
        for(int dkk = 1; dkk <= num_response; dkk++)
            numerator = numerator + temp2[dkk][dkk];
        
        
        sum = sum + numerator;    
        
        
        free_dmatrix(eiei, 1, num_response, 1, num_response);
        free_dmatrix(temp1, 1, num_response, 1, num_response);
        free_dmatrix(temp2, 1, num_response, 1, num_response);
        
        
    }
    
    
    rho = sum / ( denomenator * num_subject * num_subject );
    
    if(rho > 1)
        rho = 1;
    
    
    
    
    ////////////// VR estimation
    
    for(dii = 1; dii <= num_response; dii++)
        for(djj = 1; djj <= num_response; djj++)
        {
            if(dii == djj)
                VR[dii][djj] = rho * mu_E + (1-rho) * CE[dii][djj];
            
            if(dii != djj)    
                VR[dii][djj] = (1-rho) * CE[dii][djj];
        }
    
    
    free_dmatrix(residual, 1, num_subject, 1, num_response);
    free_dmatrix(CE, 1, num_response, 1, num_response);
    
    
    
}






/********************* calculating testing statistics T *********************/


double test_stat(double** w0y, double** covariate, int num_subject, int num_covariate,double** waldC_trans, int wald_df, int projection_dim){
    
    double** design_matrix = dmatrix(1,num_subject,1, num_covariate+1);
    int dii, djj; //dii -- index for row, djj index for column
    
        
    ////////// design matrix set up /////////////
    for(dii = 1; dii <= num_subject; dii++)
        for(djj = 1; djj <= num_covariate+1; djj++)
        {
            if(djj == 1)
                design_matrix[dii][djj] = 1; //intercept
            if(djj >= 2)
                design_matrix[dii][djj] = covariate[dii][djj-1]; //other covariate
        }
    
    
    double** waldC = dmatrix(1, num_covariate + 1, 1, wald_df);
    matrixtranspose(waldC_trans, wald_df, num_covariate + 1, waldC);//waldC
    
    
    
    //////////// beta estimation /////////////
    
    double** beta = dmatrix(1,num_covariate+1,1,projection_dim);
    double** tx0x = dmatrix(1,num_covariate+1,1,num_covariate+1); //t_design_matrix %*% design_matrix 
    double** t_design_matrix = dmatrix(1,num_covariate+1,1,num_subject);
    
    
    matrixtranspose(design_matrix, num_subject, num_covariate+1, t_design_matrix);
    matrixmultiply(t_design_matrix,num_covariate+1, num_subject, design_matrix, num_subject, num_covariate+1, tx0x); 
    
    
    double** tx0y = dmatrix(1,num_covariate+1,1,projection_dim);  
    matrixmultiply(t_design_matrix, num_covariate+1, num_subject, w0y, num_subject, projection_dim, tx0y); 
    
    invv(tx0x,num_covariate+1); // tx0x = tx0x^(-1)
    matrixmultiply(tx0x, num_covariate+1, num_covariate+1, tx0y, num_covariate+1, 1, beta);
    
    double** Cbeta = dmatrix(1, wald_df, 1, projection_dim);
    matrixmultiply(waldC_trans, wald_df, num_covariate + 1, beta, num_covariate + 1, projection_dim, Cbeta);// C'beta
    
    
    
    
    //////////////// beta_tilda and epsilon_tilda estimation ////////////////
    
    
    
    double** A = dmatrix(1, num_covariate + 1, 1, wald_df); // tx0x_inv * R_trans
    matrixmultiply(tx0x, num_covariate+1, num_covariate+1, waldC, num_covariate+1, wald_df, A);
    
    double** Ra = dmatrix(1, wald_df, 1, wald_df); // R * A
    matrixmultiply(waldC_trans, wald_df, num_covariate + 1, A, num_covariate + 1, wald_df, Ra);
    
    double** Ra_inv = dmatrix(1, wald_df, 1, wald_df);
    
    for(int dii = 1; dii <= wald_df; dii++)
        for(int djj = 1; djj <= wald_df; djj++)
            Ra_inv[dii][djj] = Ra[dii][djj];
    
    invv(Ra_inv,wald_df); // RA_inv
    
    //cout << "run to here 1" << endl;   
    
    
    
    
    double** beta_tilda_0 = dmatrix(1, num_covariate + 1,1,projection_dim);
    
    double** temp_a = dmatrix(1, num_covariate + 1, 1, wald_df); // A * RA_inv
    double** temp2_a = dmatrix(1, num_covariate + 1, 1, projection_dim); // temp_a * R_beta_hat
    
    matrixmultiply(A, num_covariate + 1, wald_df, Ra_inv, wald_df, wald_df, temp_a);
    matrixmultiply(temp_a, num_covariate + 1, wald_df, Cbeta, wald_df, projection_dim, temp2_a);
    
    for(int djj = 1; djj <= projection_dim; djj++)
    for(int dii = 1; dii <= num_covariate +1 ; dii++)
        beta_tilda_0[dii][djj] = beta[dii][djj] - temp2_a[dii][djj];
    
    
    //// epsilon_tilda ////
    
    double** residual = dmatrix(1, num_subject, 1, projection_dim);
    double** xbeta = dmatrix(1, num_subject, 1, projection_dim);
    
    matrixmultiply(design_matrix, num_subject, num_covariate+1, beta, num_covariate+1, projection_dim, xbeta);
    
    for(int djj = 1; djj <= projection_dim; djj++)
    for(dii = 1; dii <= num_subject; dii++)
        residual[dii][djj] = w0y[dii][djj] - xbeta[dii][djj];
    
    
    
    
    double** temp3_a = dmatrix(1, num_subject, 1, projection_dim); //design_matrix * temp2_a
    matrixmultiply(design_matrix, num_subject, num_covariate + 1, temp2_a, num_covariate + 1, projection_dim, temp3_a);
    
    double** epsilon_tilda = dmatrix(1, num_subject, 1, projection_dim);
    
    for(int djj = 1; djj <= projection_dim; djj++)
    for(int dii = 1; dii <= num_subject ; dii++)
        epsilon_tilda[dii][djj] = residual[dii][djj] + temp3_a[dii][djj];
    
   
    
        
    
    ////////////////// estimating covariance variance for beta ///////////////
    
        
    
    double** H = dmatrix(1, num_subject, 1, num_subject); // design_matrix * tx0x_inv * t_design_matrix
    
    double** temp = dmatrix(1, num_covariate + 1, 1, num_subject); // tx0x_inv * t_design_matrix
    
    matrixmultiply(tx0x, num_covariate + 1, num_covariate + 1, t_design_matrix, num_covariate + 1, num_subject, temp);
    
    matrixmultiply(design_matrix, num_subject, num_covariate + 1, temp, num_covariate + 1, num_subject, H);
    
    double** temp_trans = dmatrix(1, num_subject, 1, num_covariate + 1); // (tx0x_inv * t_design_matrix)_trans
    matrixtranspose(temp,num_covariate+1, num_subject, temp_trans);
    
    
    double* epsilon_tilda_sq = dvector(1, num_subject);
    
    for(int dii = 1; dii <= num_subject; dii++)
    {
        epsilon_tilda_sq[dii] = 0;
        
        for(int djj = 1; djj <= projection_dim; djj++)
            epsilon_tilda_sq[dii] += epsilon_tilda[dii][djj] * epsilon_tilda[dii][djj];
    }
    
    
        
    double** omega_tilda = dmatrix(1, num_subject, 1, num_subject);
  
    for(int dii = 1; dii <=num_subject; dii++)
        for(int djj = 1; djj <= num_subject; djj++)
        {
            if(dii == djj)
               omega_tilda[dii][djj] = epsilon_tilda_sq[dii]/ ( (1-H[dii][dii]) * (1-H[dii][dii]));
                      
            if(dii != djj)
               omega_tilda[dii][djj] = 0;
               
        }
    
    
    free_dvector(epsilon_tilda_sq, 1, num_subject);     
        
    
    double** covB = dmatrix(1, num_covariate + 1, 1, num_covariate + 1); // (tx0x_inv * t_design_matrix) * omega_tilda * (tx0x_inv * t_design_matrix)_trans
    
    double** temp_aa = dmatrix(1, num_covariate + 1, 1, num_subject); // (tx0x_inv * t_design_matrix) * omega_tilda 
    matrixmultiply(temp, num_covariate+1, num_subject, omega_tilda, num_subject, num_subject, temp_aa);
    
    matrixmultiply(temp_aa, num_covariate+1, num_subject, temp_trans, num_subject, num_covariate+1, covB);    
    
    
    
       
    ///////////////////// estimating testing statistics T /////////////
    
               
        double** t = dmatrix(1, projection_dim, 1, projection_dim);
    
        double** covariance_Cbeta_inv = dmatrix(1, wald_df, 1, wald_df);
        
        double** temp1 = dmatrix(1, wald_df, 1, num_covariate + 1);
    
        matrixmultiply(waldC_trans, wald_df, num_covariate + 1, covB, 
                   num_covariate + 1, num_covariate + 1, temp1); // C'cov(beta)
    
       
    
        matrixmultiply(temp1, wald_df, num_covariate + 1, waldC, num_covariate + 1, wald_df, covariance_Cbeta_inv); // covariance_Cbeta only
    
    
       
        invv(covariance_Cbeta_inv, wald_df); // covariance_Cbeta_inv
    
           
        double** Cbeta_trans = dmatrix(1, projection_dim, 1, wald_df);
    
        matrixtranspose(Cbeta, wald_df, projection_dim, Cbeta_trans);// (C'beta)'
    
        double** temp2 = dmatrix(1, projection_dim, 1, wald_df);
    
        matrixmultiply(Cbeta_trans, projection_dim, wald_df, covariance_Cbeta_inv, wald_df, wald_df, temp2); 
        // (C'beta)' covariance_Cbeta_inv
    
        matrixmultiply(temp2, projection_dim, wald_df, Cbeta, wald_df, projection_dim, t);        
      
        double testing_stat = 0;
    
        for(int dii = 1; dii <=projection_dim; dii++)
            testing_stat += t[dii][dii];
    
        
    
    
        free_dmatrix(temp2, 1, projection_dim, 1, wald_df);
        free_dmatrix(Cbeta, 1, wald_df, 1, projection_dim);
        free_dmatrix(temp1, 1, wald_df, 1, num_covariate + 1); 
        free_dmatrix(beta,1,num_covariate+1,1,projection_dim);
        free_dmatrix(t_design_matrix,1, num_covariate+1, 1, num_subject);
        free_dmatrix(tx0y, 1, num_covariate+1, 1, projection_dim);
        free_dmatrix(design_matrix,1,num_subject,1,num_covariate+1);
        free_dmatrix(residual, 1, num_subject, 1, projection_dim);
        free_dmatrix(tx0x, 1, num_covariate+1, 1, num_covariate+1);
        free_dmatrix(xbeta, 1, num_subject, 1, projection_dim);
        free_dmatrix(covB, 1, num_covariate+1, 1, num_covariate+1);
        free_dmatrix(covariance_Cbeta_inv, 1, wald_df, 1, wald_df);
        free_dmatrix(waldC,1, num_covariate+1, 1, wald_df);
        free_dmatrix(Cbeta_trans, 1, projection_dim, 1, wald_df);

        free_dmatrix(beta_tilda_0,1, num_covariate + 1,1,projection_dim);
        free_dmatrix(epsilon_tilda,1, num_subject, 1, projection_dim);
        free_dmatrix(A, 1,num_covariate + 1, 1,wald_df);
        free_dmatrix(Ra, 1, wald_df, 1, wald_df);
        free_dmatrix(Ra_inv, 1, wald_df, 1, wald_df);
    
           
        free_dmatrix(temp, 1, num_covariate + 1, 1, num_subject);
        free_dmatrix(temp_trans,1, num_subject, 1, num_covariate + 1);
        free_dmatrix(temp_aa,1, num_covariate + 1, 1, num_subject);
        free_dmatrix(H,1,num_subject, 1, num_subject);
        free_dmatrix(omega_tilda,1, num_subject, 1, num_subject); 
    
        free_dmatrix(temp_a,1, num_covariate + 1, 1, wald_df );
        free_dmatrix(temp2_a,1, num_covariate + 1, 1, projection_dim );
        free_dmatrix(temp3_a,1, num_subject, 1, projection_dim);
        free_dmatrix(t, 1, projection_dim, 1, projection_dim);
        
    
     
    return testing_stat;    
        
}



/********************* calculating testing statistics under null hypothesis from wild bootstrap *********************/


double test_stat_wildb(double** w0y, double** covariate, int num_subject, int num_covariate,double** waldC_trans, int wald_df, double* epsilon_star){
    
    double** design_matrix = dmatrix(1,num_subject,1, num_covariate+1);
 
    ////////// design matrix set up /////////////
    
    for(int dii = 1; dii <= num_subject; dii++)
        for(int djj = 1; djj <= num_covariate+1; djj++)
        {
            if(djj == 1)
                design_matrix[dii][djj] = 1; //intercept
            if(djj >= 2)
                design_matrix[dii][djj] = covariate[dii][djj-1]; //other covariate
        }
    
    
    ////////// wald testing C matrix re-index /////////////
    
    double** R = dmatrix(1, wald_df, 1, num_covariate + 1); 
    double** R_trans = dmatrix(1, num_covariate + 1, 1, wald_df);
    
    for(int dii = 1; dii <= wald_df; dii++)
        for(int djj = 1; djj <= num_covariate + 1; djj++)
        {
            R[dii][djj] = waldC_trans[dii][djj];
            R_trans[djj][dii] = waldC_trans[dii][djj];
        }
    
    
    
    
    //////////// beta_hat and epsilon_hat estimation /////////////
    
    double** beta_hat = dmatrix(1,num_covariate+1,1,1);
    double** tx0x = dmatrix(1,num_covariate+1,1,num_covariate+1); //t_design_matrix %*% design_matrix 
    double** t_design_matrix = dmatrix(1,num_covariate+1,1,num_subject);
    
    
    matrixtranspose(design_matrix, num_subject, num_covariate+1, t_design_matrix);
    matrixmultiply(t_design_matrix,num_covariate+1, num_subject, design_matrix, num_subject, num_covariate+1, tx0x); 
    
    
    double** tx0y = dmatrix(1,num_covariate+1,1,1);  
    matrixmultiply(t_design_matrix, num_covariate+1, num_subject, w0y, num_subject, 1, tx0y); 
    
    double** tx0x_inv = dmatrix(1,num_covariate+1,1,num_covariate+1); //inv(t_design_matrix %*% design_matrix)
    
    for(int dii = 1; dii <= num_covariate+1; dii++)
        for(int djj = 1; djj <= num_covariate + 1; djj++)
            tx0x_inv[dii][djj] = tx0x[dii][djj];
    
    invv(tx0x_inv,num_covariate+1); // tx0x^(-1)
    
    
    matrixmultiply(tx0x_inv, num_covariate+1, num_covariate+1, tx0y, num_covariate+1, 1, beta_hat);
    
    double** epsilon_hat = dmatrix(1, num_subject, 1, 1);
    double** xbeta = dmatrix(1, num_subject, 1, 1);
    
    matrixmultiply(design_matrix, num_subject, num_covariate+1, beta_hat, num_covariate+1, 1, xbeta);
    
    for(int dii = 1; dii <= num_subject; dii++)
        epsilon_hat[dii][1] = w0y[dii][1] - xbeta[dii][1];
    
    free_dmatrix(xbeta, 1, num_subject, 1, 1);

   
    
    
    
    ///////////// some basic matrix caculation set up //////////
    
    double** A = dmatrix(1, num_covariate + 1, 1, wald_df); // tx0x_inv * R_trans
    matrixmultiply(tx0x_inv, num_covariate+1, num_covariate+1, R_trans, num_covariate+1, wald_df, A);
    
    double** Rx = dmatrix(1,num_subject, 1, wald_df); // design_matrix * A
    matrixmultiply(design_matrix, num_subject, num_covariate + 1, A, num_covariate + 1,wald_df, Rx );
    
    double** Rx_trans = dmatrix(1, wald_df, 1, num_subject); // Rx transpose
    matrixtranspose(Rx, num_subject, wald_df, Rx_trans);    
   
    
    
    double** Ra = dmatrix(1, wald_df, 1, wald_df); // R * A
    matrixmultiply(R, wald_df, num_covariate + 1, A, num_covariate + 1, wald_df, Ra);
    
    double** Ra_inv = dmatrix(1, wald_df, 1, wald_df);
    
    for(int dii = 1; dii <= wald_df; dii++)
        for(int djj = 1; djj <= wald_df; djj++)
            Ra_inv[dii][djj] = Ra[dii][djj];
    
    invv(Ra_inv,wald_df); // RA_inv
    
    
    
    double** H = dmatrix(1, num_subject, 1, num_subject); // design_matrix * tx0x_inv * t_design_matrix
    
    double** temp = dmatrix(1, num_covariate + 1, 1, num_subject); // tx0x_inv * t_design_matrix
    
    matrixmultiply(tx0x_inv, num_covariate + 1, num_covariate + 1, t_design_matrix, num_covariate + 1, num_subject, temp);
    
    matrixmultiply(design_matrix, num_subject, num_covariate + 1, temp, num_covariate + 1, num_subject, H);
    
    free_dmatrix(temp, 1, num_covariate + 1, 1, num_subject);

    
    
    double** R_beta_hat = dmatrix(1, wald_df, 1, 1); // R * beta_hat
    matrixmultiply(R, wald_df, num_covariate+1, beta_hat, num_covariate + 1, 1, R_beta_hat);
    
    
    
    
    ///////////// beta_tilda_0 and epsilon_tilda estimation //////////
    
    double** beta_tilda_0 = dmatrix(1, num_covariate + 1,1,1);
    
    double** temp_a = dmatrix(1, num_covariate + 1, 1, wald_df); // A * RA_inv
    double** temp2_a = dmatrix(1, num_covariate + 1, 1, 1); // temp_a * R_beta_hat
    
    matrixmultiply(A, num_covariate + 1, wald_df, Ra_inv, wald_df, wald_df, temp_a);
    matrixmultiply(temp_a, num_covariate + 1, wald_df, R_beta_hat, wald_df, 1, temp2_a);
    
    for(int dii = 1; dii <= num_covariate +1 ; dii++)
        beta_tilda_0[dii][1] = beta_hat[dii][1] - temp2_a[dii][1];
    
    
    double** temp3_a = dmatrix(1, num_subject, 1, 1); //design_matrix * temp2_a
    matrixmultiply(design_matrix, num_subject, num_covariate + 1, temp2_a, num_covariate + 1, 1, temp3_a);
    
    double** epsilon_tilda = dmatrix(1, num_subject, 1, 1);
    
    for(int dii = 1; dii <= num_subject ; dii++)
        epsilon_tilda[dii][1] = epsilon_hat[dii][1] + temp3_a[dii][1];
    
    free_dmatrix(temp_a,1, num_covariate + 1, 1, wald_df );
    free_dmatrix(temp2_a,1, num_covariate + 1, 1, 1 );
    free_dmatrix(temp3_a,1, num_subject, 1, 1);
    
    
    
    ///////////// omega_tilda and sq_omega_tilda estimation //////////

    double** omega_tilda = dmatrix(1, num_subject, 1, num_subject);
    double** sq_omega_tilda = dmatrix(1, num_subject, 1, num_subject);
    
    for(int dii = 1; dii <=num_subject; dii++)
        for(int djj = 1; djj <= num_subject; djj++)
        {
            if(dii == djj)
            {
                
                omega_tilda[dii][djj] = epsilon_tilda[dii][1] * epsilon_tilda[dii][1] / ( (1-H[dii][dii]) * (1-H[dii][dii]));
                sq_omega_tilda[dii][djj] = epsilon_tilda[dii][1] / (1-H[dii][dii]);
                
            }
            
            if(dii != djj)
            {
                //original version of wild bootstrap
                omega_tilda[dii][djj] = 0;
                sq_omega_tilda[dii][djj] = 0;
                
                        
                
            }
        }
            

    ///////////// Y_star estimation //////////
    
    double** Y_star = dmatrix(1, num_subject, 1, 1);
        
    double** temp_b = dmatrix(1, num_subject, 1, 1); // sq_omega_tilda * epsilon_star
    double** temp2_b = dmatrix(1, num_subject, 1,1); //design_matrix * beta_tilda_0
    
    for(int dii = 1; dii <= num_subject; dii++)
       temp_b[dii][1] = epsilon_tilda[dii][1] * epsilon_star[dii] / (1-H[dii][dii]);
    
    matrixmultiply(design_matrix, num_subject, num_covariate + 1, beta_tilda_0, num_covariate + 1, 1, temp2_b);
    
    for(int dii = 1; dii <= num_subject; dii++)
        Y_star[dii][1] = temp_b[dii][1] + temp2_b[dii][1]; 
    
    free_dmatrix(temp_b, 1, num_subject, 1, 1);
    free_dmatrix(temp2_b, 1, num_subject, 1, 1);
    
    
    ///////////// epsilon_hat_star estimation //////////  
    
    double** epsilon_hat_star = dmatrix(1, num_subject, 1, 1);
   
    double** epsilon_star_matrix = dmatrix(1, num_subject, 1,1); // epsilon_star matrix version
    
    for(int dii = 1; dii <= num_subject; dii++)
        epsilon_star_matrix[dii][1] = epsilon_star[dii];
    
    double** temp_c = dmatrix(1,num_subject, 1, num_subject);//H * sq_omega_tilda
    matrixmultiply(H, num_subject, num_subject, sq_omega_tilda, num_subject, num_subject,temp_c);
    
    double** temp2_c = dmatrix(1, num_subject, 1,1);//temp_c * epsilon_star_matrix
    matrixmultiply(temp_c, num_subject, num_subject, epsilon_star_matrix, num_subject, 1, temp2_c);
    
    double** temp3_c = dmatrix(1, num_subject, 1,1); //design_matrix * beta_tilda_0
    matrixmultiply(design_matrix, num_subject, num_covariate + 1, beta_tilda_0, num_covariate + 1, 1, temp3_c);
    
    for(int dii = 1; dii <= num_subject ; dii++)
        epsilon_hat_star[dii][1] = Y_star[dii][1] - temp3_c[dii][1]- temp2_c[dii][1];
       
    free_dmatrix(temp_c, 1, num_subject, 1, num_subject);
    free_dmatrix(temp2_c, 1, num_subject, 1, 1);
    free_dmatrix(temp3_c, 1, num_subject, 1, 1);
    
    
    
    ///////////// epsilon_tilda_star estimation //////////  
    
    double** epsilon_tilda_star = dmatrix(1, num_subject, 1, 1);
    
    double** temp_d = dmatrix(1, num_subject, 1, 1); //sq_omega_tilda * epsilon_star_matrix
    matrixmultiply(sq_omega_tilda, num_subject, num_subject, epsilon_star_matrix, num_subject, 1, temp_d);
    
    double** temp2_d = dmatrix(1, wald_df, 1, 1); // Rx_trans * temp_d
    matrixmultiply(Rx_trans, wald_df, num_subject, temp_d, num_subject, 1, temp2_d);
    
    double** temp3_d = dmatrix(1, wald_df, 1, 1); // Ra_inv * temp2_d
    matrixmultiply(Ra_inv, wald_df, wald_df, temp2_d, wald_df, 1, temp3_d);
    
    double** temp4_d = dmatrix(1, num_subject, 1, 1); // Rx * temp3_d 
    matrixmultiply(Rx,num_subject, wald_df, temp3_d, wald_df, 1, temp4_d);
    
    for(int dii = 1; dii <= num_subject; dii++)
        epsilon_tilda_star[dii][1] = epsilon_hat_star[dii][1] + temp4_d[dii][1];
    
    
    free_dmatrix(temp_d,1, num_subject, 1, 1);
    free_dmatrix(temp2_d, 1, wald_df, 1, 1);
    free_dmatrix(temp3_d, 1, wald_df, 1, 1);
    free_dmatrix(temp4_d, 1, num_subject, 1, 1);
    
    
    ///////////// omega_tilda_star estimation //////////
    
    double** omega_tilda_star = dmatrix(1, num_subject, 1, num_subject);
  
    
    for(int dii = 1; dii <=num_subject; dii++)
        for(int djj = 1; djj <= num_subject; djj++)
        {
            if(dii == djj)
                omega_tilda_star[dii][djj] = epsilon_tilda_star[dii][1] * epsilon_tilda_star[dii][1] / ( (1-H[dii][dii]) * (1-H[dii][dii]));
                            
            if(dii != djj)
                omega_tilda_star[dii][djj] = 0;
               
        }
    
    
    
    ///////////// sigma_omega_tilda_star estimation //////////
    
    double** sigma_omega_tilda_star = dmatrix(1, wald_df, 1, wald_df);
    
    double** temp_e = dmatrix(1, num_subject, 1, wald_df); // omega_tilda_star * Rx
    matrixmultiply(omega_tilda_star, num_subject, num_subject, Rx, num_subject, wald_df, temp_e);
    
    matrixmultiply(Rx_trans, wald_df, num_subject, temp_e, num_subject, wald_df, sigma_omega_tilda_star);
    
    free_dmatrix(temp_e,1, num_subject, 1, wald_df);
    
    
    double** sigma_omega_tilda_star_inv = dmatrix(1, wald_df, 1, wald_df);//sigma_omega_tilda_star ^(-1)
    
    for(int dii = 1; dii <= wald_df ; dii++)
        for(int djj = 1; djj <= wald_df ; djj++)
            sigma_omega_tilda_star_inv[dii][djj] = sigma_omega_tilda_star[dii][djj];
    
    invv(sigma_omega_tilda_star_inv,wald_df);
    
    
    //////////////////// testing statistics under null estimation ///////////////////
    
    
    double** temp_f = dmatrix(1, num_subject, 1, 1); //sq_omega_tilda * epsilon_star_matrix
    matrixmultiply(sq_omega_tilda, num_subject, num_subject, epsilon_star_matrix, num_subject, 1, temp_f);
    
    double** temp2_f = dmatrix(1,wald_df,1,1); //Rx_trans * temp_f
    matrixmultiply(Rx_trans, wald_df, num_subject, temp_f, num_subject, 1,temp2_f);
    
    double** temp2_f_trans = dmatrix(1,1,1,wald_df);//temp2_f transpose
    matrixtranspose(temp2_f,wald_df,1,temp2_f_trans);
    
    double** temp3_f = dmatrix(1, wald_df, 1, 1); // sigma_omega_tilda_star_inv * temp2_f
    matrixmultiply(sigma_omega_tilda_star_inv, wald_df, wald_df, temp2_f, wald_df, 1, temp3_f);
    
    
    double** testing_stat = dmatrix(1,1,1,1); // testing stat under H0
    
    matrixmultiply(temp2_f_trans, 1, wald_df, temp3_f, wald_df, 1, testing_stat);
    
    
    free_dmatrix(temp_f,1, num_subject, 1, 1);
    free_dmatrix(temp2_f, 1, wald_df, 1, 1);
    free_dmatrix(temp2_f_trans,1,1,1,wald_df);
    free_dmatrix(temp3_f,1, wald_df, 1, 1);
    
    
    double t = 0;
    t = testing_stat[1][1];
    
        
    
    
    
    
    ////////////////// free memory ///////////////
        
        free_dmatrix(beta_hat,1,num_covariate+1,1,1);
        free_dmatrix(beta_tilda_0,1,num_covariate+1,1,1);
        free_dmatrix(t_design_matrix,1, num_covariate+1, 1, num_subject);
        free_dmatrix(tx0y, 1, num_covariate+1, 1, 1);
        free_dmatrix(design_matrix,1,num_subject,1,num_covariate+1);
        free_dmatrix(tx0x, 1, num_covariate+1, 1, num_covariate+1);
        free_dmatrix(tx0x_inv, 1, num_covariate+1, 1, num_covariate+1);
    
        free_dmatrix(R,1, wald_df, 1, num_covariate + 1);
        free_dmatrix(R_trans, 1, num_covariate+1, 1, wald_df);
    
        free_dmatrix(A, 1,num_covariate + 1, 1,wald_df);
        free_dmatrix(Rx, 1, num_subject, 1, wald_df);
        free_dmatrix(Rx_trans,1, wald_df, 1, num_subject);
        free_dmatrix(Ra, 1, wald_df, 1, wald_df);
        free_dmatrix(Ra_inv, 1, wald_df, 1, wald_df);
        free_dmatrix(H, 1, num_subject, 1, num_subject);
    
        free_dmatrix(R_beta_hat,1, wald_df, 1, 1);
    
        free_dmatrix(epsilon_tilda,1, num_subject, 1, 1);
        free_dmatrix(epsilon_hat, 1, num_subject, 1, 1);
        free_dmatrix(omega_tilda, 1, num_subject, 1 , num_subject);
        free_dmatrix(omega_tilda_star, 1, num_subject, 1 , num_subject);
        free_dmatrix(sq_omega_tilda, 1, num_subject, 1 , num_subject);
        free_dmatrix(Y_star,1, num_subject, 1, 1);
        free_dmatrix(epsilon_hat_star, 1, num_subject, 1, 1);
        free_dmatrix(epsilon_star_matrix, 1, num_subject, 1, 1);
        free_dmatrix(epsilon_tilda_star, 1, num_subject, 1, 1);
    
        free_dmatrix(sigma_omega_tilda_star, 1, wald_df, 1, wald_df);
        free_dmatrix(sigma_omega_tilda_star_inv, 1, wald_df, 1, wald_df);
    
        free_dmatrix(testing_stat, 1,1,1,1);



    
    
     
    return t;    
        
}









/******************* screening **************************/
void screening(double** response, double** covariate,double* response_pvalue,int num_response,int num_subject, int num_covariate, int wald_df, double** waldC_trans)
{

    double** design_matrix = dmatrix(1, num_subject,1, num_covariate+1);
        
    
    ////////// design matrix set up /////////////
    for(int dii = 1; dii <= num_subject; dii++)
        for(int djj = 1; djj <= num_covariate+1; djj++)
        {
            if(djj == 1)
                design_matrix[dii][djj] = 1; //intercept
            if(djj >= 2)
                design_matrix[dii][djj] = covariate[dii][djj-1]; //other covariate
        }
    
    
    double** waldC = dmatrix(1, num_covariate + 1, 1, wald_df);
    matrixtranspose(waldC_trans, wald_df, num_covariate + 1, waldC);//waldC
    
    double** tx0x = dmatrix(1,num_covariate+1,1,num_covariate+1); //t_design_matrix %*% design_matrix 
    double** t_design_matrix = dmatrix(1,num_covariate+1,1,num_subject);
    
    
    matrixtranspose(design_matrix, num_subject, num_covariate+1, t_design_matrix);
    matrixmultiply(t_design_matrix,num_covariate+1, num_subject, design_matrix, num_subject, num_covariate+1, tx0x); 
    
    invv(tx0x,num_covariate+1); // tx0x = tx0x^(-1)

    
    
    //////////// testing for each response /////////////
    
     double* temp_index = dvector(1, num_response);// sorted p-value
    
    for(int drr = 1; drr <= num_response; drr++)
    {
        
    double** y = dmatrix(1, num_subject, 1, 1); // working response
        
    for(int dkk = 1; dkk <= num_subject; dkk++)    
        y[dkk][1] = response[dkk][drr];
        
    double** beta = dmatrix(1,num_covariate+1,1,1);
        
    
    double** tx0y = dmatrix(1,num_covariate+1,1,1);  
    matrixmultiply(t_design_matrix, num_covariate+1, num_subject, y, num_subject, 1, tx0y); 
    
    matrixmultiply(tx0x, num_covariate+1, num_covariate+1, tx0y, num_covariate+1, 1, beta);
        
        
    double** xbeta = dmatrix(1, num_subject, 1, 1); //X * beta  
        matrixmultiply(design_matrix, num_subject, num_covariate + 1, beta, num_covariate + 1, 1, xbeta);
        
       
    double** residual = dmatrix(1,num_subject, 1, 1); // y - xbeta
        for(int dii = 1; dii <= num_subject; dii++)
            residual[dii][1] = y[dii][1] - xbeta[dii][1];
        
    double sigma_sq;  // sigma  
    double sum = 0;  
        for(int dii = 1; dii <= num_subject; dii++)
            sum = sum + residual[dii][1] * residual[dii][1];
    sigma_sq = sum / (num_subject - num_covariate - 1);     
        
    
    double** covariance_beta = dmatrix(1, num_covariate + 1, 1, num_covariate + 1); // covariance(beta)
        for(int dii = 1; dii <=num_covariate + 1; dii++)
            for(int djj = 1; djj <= num_covariate + 1; djj++)
                covariance_beta[dii][djj] = sigma_sq * tx0x[dii][djj];
        
    
    double** covariance_wald_beta = dmatrix(1, wald_df, 1, wald_df); // C' * covariance(beta) * C
        double** temp1 = dmatrix(1, wald_df, 1, num_covariate + 1); // C' * covariance(beta)
        matrixmultiply(waldC_trans, wald_df, num_covariate + 1, covariance_beta, num_covariate + 1, 
                       num_covariate + 1, temp1);
        matrixmultiply(temp1, wald_df, num_covariate + 1, waldC, num_covariate + 1, wald_df, covariance_wald_beta);
    invv(covariance_wald_beta, wald_df); // covariance_wald_beta = ( C' * covariance(beta) * C ) ^(-1)  
        
        
    
    double** Cbeta = dmatrix(1, wald_df, 1, 1); // C'* beta
    matrixmultiply(waldC_trans, wald_df, num_covariate + 1, beta, num_covariate + 1, 1, Cbeta);// C'beta
        
    double** Cbeta_trans = dmatrix(1, 1, 1, wald_df); // (C' * beta )'
    matrixtranspose(Cbeta, wald_df, 1, Cbeta_trans);    
        
        
    double** temp2 = dmatrix(1, 1, 1, wald_df); // (C' * beta )' *  ( C' * covariance(beta) * C ) ^(-1)  
        matrixmultiply(Cbeta_trans, 1, wald_df, covariance_wald_beta, wald_df, wald_df, temp2);
        
    double** chisq = dmatrix(1, 1, 1, 1); // final wald test stat
    matrixmultiply(temp2, 1, wald_df, Cbeta, wald_df, 1, chisq);    
        
    
    /////// retrieve p-value for wald test stat //////
        
    int  which[1];    
    double p[1];    
    double q[1];
    double x[1];  
    double df[1];  
    int status[1];
    double bound[1];
        
    which[0] = 1;
    x[0] = chisq[1][1];
    df[0] = double (wald_df) ;   
        
        
    cdfchi(which, p, q ,x, df, status,bound);// chisq distribution  
     
    response_pvalue[drr] = q[0];
        
    temp_index[drr] = q[0];
           
        
    ////////////// free memory //////////////    
        
        free_dmatrix(y, 1, num_subject, 1, 1);
        free_dmatrix(beta, 1, num_covariate + 1, 1, 1);
        free_dmatrix(tx0y, 1, num_covariate + 1, 1, 1);
        free_dmatrix(Cbeta, 1, wald_df,1, 1);
        free_dmatrix(residual, 1, num_subject, 1, 1);
        free_dmatrix(xbeta, 1, num_subject, 1, 1);
        free_dmatrix(covariance_beta, 1, num_covariate + 1, 1, num_covariate + 1);
        free_dmatrix(covariance_wald_beta, 1, wald_df, 1, wald_df);
        free_dmatrix(temp1, 1, wald_df, 1, num_covariate + 1);
        free_dmatrix(Cbeta_trans, 1, 1, 1, wald_df);
        free_dmatrix(temp2, 1, 1, 1, wald_df);
        free_dmatrix(chisq, 1, 1, 1, 1);
    
    } // loop for each response
    
    
    double num_effect_response_double;
    
    if(num_subject >= num_response)
    num_effect_response_double = double(num_response) / log(double(num_response));
    
    if(num_subject < num_response)
        num_effect_response_double = double(num_subject) / log(double(num_subject));
    
    
    
    int num_effect_response_int = 0;
    
    
    
    
    if(num_subject >= num_response)
    for(int dii = 1; dii<= num_response; dii++) // screening for [q/log(q)] + 1 responses 
        if(num_effect_response_double > dii)
            if(num_effect_response_double <= (dii + 1))
                num_effect_response_int = dii + 1 ;
    
    
    
    if(num_subject < num_response)
        for(int dii = 1; dii<= num_subject; dii++) // screening for [q/log(q)] + 1 responses 
            if(num_effect_response_double > dii)
                if(num_effect_response_double <= (dii + 1))
                    num_effect_response_int = dii + 1 ;
    
    
    
    ///////// sort pvalue in temp_index //////
   
    double c;
    
    for (int dii =1; dii <= num_response - 1; dii++)
        for (int djj = dii+1 ;djj <= num_response; djj++)
        {if ( temp_index[dii]  > temp_index[djj] )
                {
                    c=temp_index[dii];                // these 3 statements swap values
                    temp_index[dii] = temp_index[djj];          // in the 2 cells being compared  
                    temp_index[djj] = c;
                }
        }
            
    
    
    /*
    for(int dii = 1; dii <= num_response; dii++)
        cout << response_pvalue[dii] << " " ;
    cout << endl;
    */
     
     
     
       
    //// assign indicator for valid response ////////
    
     
    for(int dii = 1; dii <= num_response; dii++)
    {
        if(response_pvalue[dii] <= temp_index[num_effect_response_int])
            response_pvalue[dii] = 1;
        else response_pvalue[dii] = 0;
    }
    
    
    
    /*
    for(int dii = 1; dii <= num_response; dii++)
        cout << response_pvalue[dii] << " " ;
    cout << endl;
    */
    
    
    
    
    free_dmatrix(design_matrix, 1, num_subject, 1, num_covariate + 1);
    free_dmatrix(waldC, 1, num_covariate + 1, 1, wald_df);
    free_dmatrix(tx0x, 1, num_covariate + 1, 1, num_covariate + 1);
    free_dmatrix(t_design_matrix, 1, num_covariate + 1, 1, num_subject);
    
    free_dvector(temp_index, 1, num_response);

    
       
}



/******************** wild bootstrap sample generating *****************/
void wildb_sample_generating(double* epsilon_star, double** all_response, double** covariate, int num_subject, int num_covariate, int num_response, double** wildb_response, double** waldC_trans, int wald_df)
{
    double** design_matrix = dmatrix(1,num_subject,1, num_covariate+1);
    
    ////////// design matrix set up /////////////
    
    for(int dii = 1; dii <= num_subject; dii++)
        for(int djj = 1; djj <= num_covariate+1; djj++)
        {
            if(djj == 1)
                design_matrix[dii][djj] = 1; //intercept
                if(djj >= 2)
                    design_matrix[dii][djj] = covariate[dii][djj-1]; //other covariate
                    }
    
    
    
    ////////// wald testing C matrix re-index /////////////
    
    double** R = dmatrix(1, wald_df, 1, num_covariate + 1); 
    double** R_trans = dmatrix(1, num_covariate + 1, 1, wald_df);
    
    for(int dii = 1; dii <= wald_df; dii++)
        for(int djj = 1; djj <= num_covariate + 1; djj++)
        {
            R[dii][djj] = waldC_trans[dii][djj];
            R_trans[djj][dii] = waldC_trans[dii][djj];
        }
    
    
    
            
    //////////// beta_hat and epsilon_hat estimation /////////////
    
    double** beta_hat = dmatrix(1,num_covariate+1,1,num_response);
    double** tx0x = dmatrix(1,num_covariate+1,1,num_covariate+1); //t_design_matrix %*% design_matrix 
    double** t_design_matrix = dmatrix(1,num_covariate+1,1,num_subject);
    
    
    matrixtranspose(design_matrix, num_subject, num_covariate+1, t_design_matrix);
    matrixmultiply(t_design_matrix,num_covariate+1, num_subject, design_matrix, num_subject, num_covariate+1, tx0x); 
    
    
    double** tx0y = dmatrix(1,num_covariate+1,1,num_response);  
    matrixmultiply(t_design_matrix, num_covariate+1, num_subject, all_response, num_subject, num_response, tx0y); 
    
    double** tx0x_inv = dmatrix(1,num_covariate+1,1,num_covariate+1); //inv(t_design_matrix %*% design_matrix)
    
    for(int dii = 1; dii <= num_covariate+1; dii++)
        for(int djj = 1; djj <= num_covariate + 1; djj++)
            tx0x_inv[dii][djj] = tx0x[dii][djj];
            
    invv(tx0x_inv,num_covariate+1); // tx0x^(-1)
            
            
    matrixmultiply(tx0x_inv, num_covariate+1, num_covariate+1, tx0y, num_covariate+1, num_response, beta_hat);
            
    double** epsilon_hat = dmatrix(1, num_subject, 1, num_response);
    double** xbeta = dmatrix(1, num_subject, 1, num_response);
            
    matrixmultiply(design_matrix, num_subject, num_covariate+1, beta_hat, num_covariate+1, num_response, xbeta);
            
    for(int dii = 1; dii <= num_subject; dii++)
        for(int djj = 1; djj <=num_response; djj++)
        epsilon_hat[dii][djj] = all_response[dii][djj] - xbeta[dii][djj];
                
    free_dmatrix(xbeta, 1, num_subject, 1, num_response);
                
                
                
                
                
    ///////////// some basic matrix caculation set up //////////
                
    double** A = dmatrix(1, num_covariate + 1, 1, wald_df); // tx0x_inv * R_trans
    matrixmultiply(tx0x_inv, num_covariate+1, num_covariate+1, R_trans, num_covariate+1, wald_df, A);
                
    double** Rx = dmatrix(1,num_subject, 1, wald_df); // design_matrix * A
    matrixmultiply(design_matrix, num_subject, num_covariate + 1, A, num_covariate + 1,wald_df, Rx );
                
    double** Rx_trans = dmatrix(1, wald_df, 1, num_subject); // Rx transpose
    matrixtranspose(Rx, num_subject, wald_df, Rx_trans);    
                
                
                
    double** Ra = dmatrix(1, wald_df, 1, wald_df); // R * A
    matrixmultiply(R, wald_df, num_covariate + 1, A, num_covariate + 1, wald_df, Ra);
                
    double** Ra_inv = dmatrix(1, wald_df, 1, wald_df);
                
    for(int dii = 1; dii <= wald_df; dii++)
       for(int djj = 1; djj <= wald_df; djj++)
           Ra_inv[dii][djj] = Ra[dii][djj];
                
    invv(Ra_inv,wald_df); // RA_inv
                        
                        
                       
    double** H = dmatrix(1, num_subject, 1, num_subject); // design_matrix * tx0x_inv * t_design_matrix
                        
    double** temp = dmatrix(1, num_covariate + 1, 1, num_subject); // tx0x_inv * t_design_matrix
                        
    matrixmultiply(tx0x_inv, num_covariate + 1, num_covariate + 1, t_design_matrix, num_covariate + 1, num_subject, temp);
                        
    matrixmultiply(design_matrix, num_subject, num_covariate + 1, temp, num_covariate + 1, num_subject, H);
                        
    free_dmatrix(temp, 1, num_covariate + 1, 1, num_subject);
                        
                   
    double** R_beta_hat = dmatrix(1, wald_df, 1, num_response); // R * beta_hat
    matrixmultiply(R, wald_df, num_covariate+1, beta_hat, num_covariate + 1, num_response, R_beta_hat);
                        
                        
                        
                        
    ///////////// beta_tilda_0 and epsilon_tilda estimation //////////
                        
    double** beta_tilda_0 = dmatrix(1, num_covariate + 1,1,num_response);
                        
    double** temp_a = dmatrix(1, num_covariate + 1, 1, wald_df); // A * RA_inv
    double** temp2_a = dmatrix(1, num_covariate + 1, 1, num_response); // temp_a * R_beta_hat
                        
    matrixmultiply(A, num_covariate + 1, wald_df, Ra_inv, wald_df, wald_df, temp_a);
    matrixmultiply(temp_a, num_covariate + 1, wald_df, R_beta_hat, wald_df, num_response, temp2_a);
                        
    for(int dii = 1; dii <= num_covariate +1 ; dii++)
        for(int djj = 1; djj <= num_response; djj++)
       beta_tilda_0[dii][djj] = beta_hat[dii][djj] - temp2_a[dii][djj];
                            
                            
    free_dmatrix(temp_a,1, num_covariate + 1, 1, wald_df );
    free_dmatrix(temp2_a,1, num_covariate + 1, 1, num_response);
                                    
                                
                                   
    
    ///////////// wild bootstrap sample generation//////////
    
            double** xbeta0 = dmatrix(1, num_subject, 1, num_response); // X * beta_tilda_0
            matrixmultiply(design_matrix, num_subject, num_covariate + 1, beta_tilda_0, num_covariate + 1, num_response, xbeta0);
            
            double** epsilon_wildb = dmatrix(1, num_subject, 1, num_response); // epsilon_hat * diag(epsilon_star)
             
            for(int dii = 1; dii <= num_subject; dii++)
                for(int djj = 1; djj <= num_response; djj ++)
                    epsilon_wildb[dii][djj] = epsilon_hat[dii][djj] * epsilon_star[dii];
                    
                    
            for(int dii = 1; dii <= num_subject; dii++)
                for(int djj = 1; djj <= num_response; djj++)
                    wildb_response[dii][djj] = xbeta0[dii][djj] + epsilon_wildb[dii][djj];        
                                            
                                    
    
    ////////////////// free memory ///////////////
            
            free_dmatrix(beta_hat,1,num_covariate+1,1,num_response);
            free_dmatrix(beta_tilda_0,1,num_covariate+1,1,num_response);
            free_dmatrix(t_design_matrix,1, num_covariate+1, 1, num_subject);
            free_dmatrix(tx0y, 1, num_covariate+1, 1, num_response);
            free_dmatrix(design_matrix,1,num_subject,1,num_covariate+1);
            free_dmatrix(tx0x, 1, num_covariate+1, 1, num_covariate+1);
            free_dmatrix(tx0x_inv, 1, num_covariate+1, 1, num_covariate+1);
            
            free_dmatrix(A, 1,num_covariate + 1, 1,wald_df);
            free_dmatrix(Rx, 1, num_subject, 1, wald_df);
            free_dmatrix(Rx_trans,1, wald_df, 1, num_subject);
            free_dmatrix(Ra, 1, wald_df, 1, wald_df);
            free_dmatrix(Ra_inv, 1, wald_df, 1, wald_df);
            free_dmatrix(H, 1, num_subject, 1, num_subject);
            
            free_dmatrix(R_beta_hat,1, wald_df, 1, num_response);
            
            free_dmatrix(R,1, wald_df, 1, num_covariate + 1);
            free_dmatrix(R_trans, 1, num_covariate+1, 1, wald_df);
            
            free_dmatrix(epsilon_hat, 1, num_subject, 1, num_response);
             
            free_dmatrix(xbeta0, 1, num_subject, 1, num_response);
            free_dmatrix(epsilon_wildb, 1, num_subject, 1, num_response);      
            
}










/************************ sparse PCA to get sparse loading from Zou and Hastie 2001 **********************/
void sparse_pca(float* spca_conv,double** P,int num_effect_response_int,double* eigenvector)
{

    
    double** a = dmatrix(1, num_effect_response_int, 1, num_effect_response_int); // duplicate of P to get initial estimation of alpha in Hastie's paper
    double* eigenvalue = dvector(1,num_effect_response_int); // eigenvalue in svd decomposition
    double** v = dmatrix(1, num_effect_response_int, 1, num_effect_response_int); // the V matrix in svd decomposition
    
    for(int dii = 1; dii <= num_effect_response_int; dii++)
        for(int djj = 1; djj <= num_effect_response_int; djj++)
            a[dii][djj] = P[dii][djj];
    
    
    svdcmp(a, num_effect_response_int, num_effect_response_int, eigenvalue, v); 
        
    double** alpha = dmatrix(1, num_effect_response_int, 1, 1); // first eigenvector of P
        
    for(int dii = 1; dii <= num_effect_response_int; dii++)
        alpha[dii][1] = a[dii][1];
    
    double** alpha_trans = dmatrix(1, 1, 1, num_effect_response_int);
    matrixtranspose(alpha, num_effect_response_int, 1, alpha_trans);

    
    
    
    
    
    
    //// calculate lambda_max  = max(alpha_trans*P) ////    
    double** lambda = dmatrix(1, 1, 1, num_effect_response_int);
    matrixmultiply(alpha_trans, 1, num_effect_response_int, P, num_effect_response_int, num_effect_response_int, lambda);
    
    double lambda_max;
    for(int dii = 1; dii <= num_effect_response_int; dii++)
        {
        if(dii == 1)
            lambda_max = fabs(lambda[1][dii]);
            
        if(fabs(lambda[1][dii]) > lambda_max)    
            lambda_max = lambda[1][dii];
        }
    
    
        
    
    
    double* lambda_seq = dvector(1, 101);
    lambda_seq[101] = lambda_max;
    lambda_seq[1] = 0;
    for(int dii = 2; dii <= 100; dii++)
        lambda_seq[dii] = lambda_max/100*(dii-1);
     
    
        
    
    ////////// calculate loading from each lambda /////////////
    
    for(int dll = 50; dll <= 50; dll++) // loop for lambda_seq
        {
        
            
            
            double** iteration_alpha = dmatrix(1, num_effect_response_int,1,1);
            for(int dii = 1; dii <= num_effect_response_int; dii++)
                iteration_alpha[dii][1] = alpha[dii][1];
                        
            double** iteration_alpha_trans = dmatrix(1, 1, 1,num_effect_response_int);
            matrixtranspose(iteration_alpha, num_effect_response_int, 1, iteration_alpha_trans);
            
            
            
            
            double** iteration_beta = dmatrix(1, num_effect_response_int, 1, 1);
            double** iteration_beta_trans = dmatrix(1, 1, 1, num_effect_response_int);
            
            for(int dii = 1; dii <= num_effect_response_int; dii++)
                iteration_beta[dii][1] = 0;
            
            double error = 1;
            
                        
            while(error > 0.01)
                {
                
                    cout << "error: " << error << endl;                 
                    
                    double** new_beta = dmatrix(1, num_effect_response_int, 1, 1);
            
                    double** temp1 = dmatrix(1, 1, 1, num_effect_response_int); // iteration_alpha_trans * P
                    matrixmultiply(iteration_alpha_trans, 1, num_effect_response_int, P, num_effect_response_int, num_effect_response_int, temp1);
                    
                                       
                    
                    ///// calculate new_beta /////
                    for(int dii = 1; dii <= num_effect_response_int; dii++)
                        {
                        
                            double sign_temp = 0;
                            
                            if(temp1[1][dii] > 0)
                                sign_temp = 1.0;
                            if(temp1[1][dii] < 0)
                                sign_temp = -1.0;
                            
                            
                            if( fabs(temp1[1][dii]) > lambda_seq[dll] / 2 )
                                new_beta[dii][1] = (fabs(temp1[1][dii]) - lambda_seq[dll] / 2) * sign_temp;
                            
                            if( fabs(temp1[1][dii]) < lambda_seq[dll] / 2 )
                                new_beta[dii][1] = 0;
                            
                          }
                
                   
                    
                    //////// update iteration_alpha //////////
                    
                    double** temp2 = dmatrix(1, num_effect_response_int, 1, 1); // P * new_beta
                    double** tempP = dmatrix(1, num_effect_response_int, 1, num_effect_response_int);
                    
                    
                    for(int dii = 1; dii <= num_effect_response_int; dii++)
                        for(int djj = 1; djj <= num_effect_response_int; djj++)
                            tempP[dii][djj] = P[dii][djj];                            
                            
                            
                    matrixmultiply(tempP, num_effect_response_int, num_effect_response_int, new_beta, num_effect_response_int, 1, temp2);
                    
                    double** tempv = dmatrix(1, 1, 1, 1);
                    double* temp_eigenvalue = dvector(1, 1);
                    double** temp3 = dmatrix(1, num_effect_response_int, 1, 1); // UV^T
                    
                    
                                       
                    svdcmp(temp2, num_effect_response_int, 1, temp_eigenvalue, tempv); 
                   
                    
                    
                    
                    
                    
                    
                    
                    matrixmultiply(tempP, num_effect_response_int, 1, tempv , 1, 1, temp3);
                    
                    
                    for(int dii = 1; dii <= num_effect_response_int; dii++)
                        iteration_alpha[dii][1] = temp3[dii][1];
                    
                    matrixtranspose(iteration_alpha, num_effect_response_int, 1, iteration_alpha_trans);
                    
                                       
                    
                    
                    free_dmatrix(temp2, 1, num_effect_response_int, 1, 1);
                    free_dmatrix(tempP, 1, num_effect_response_int, 1, num_effect_response_int);
                    free_dmatrix(tempv, 1, 1, 1, 1);
                    free_dvector(temp_eigenvalue, 1, 1);
                    free_dmatrix(temp3, 1, num_effect_response_int, 1, 1);
                                       
                    
                    
                    
                    
                    //////// convergence check ////////////
                    
                    
                    error = fabs(new_beta[1][1] - iteration_beta[1][1]);
                    
                    for(int dii = 2; dii <= num_effect_response_int; dii++)
                        {
                        if(fabs(new_beta[dii][1] - iteration_beta[dii][1]) > error)
                            error = fabs(new_beta[dii][1] - iteration_beta[dii][1]);
                        }
                    
                    
                    
                    int collinear_index = 0; // 0 means new_beta is collinear to iteration_beta
                    
                    for(int djj = 1; djj <= num_effect_response_int; djj++)
                        {
                        if(fabs(new_beta[djj][1]) != fabs(iteration_beta[djj][1]))
                            { 
                                collinear_index = 1;
                                break;
                            }
                        }
                    
                    
                    if(collinear_index == 0)
                        error = 0;
                    
                    
                    cout<< "alpha:" << endl;
                    for(int dii = 1; dii <= num_effect_response_int; dii++)
                        cout <<iteration_alpha[dii][1] << " ";
                    cout << endl;
                    
                    
                    cout<< "beta:" << endl;
                    for(int dii = 1; dii <= num_effect_response_int; dii++)
                        cout <<new_beta[dii][1] << " ";
                    cout << endl;
                    
                    
                    
                    int zero_count = 0;
                    for(int dii = 1; dii <= num_effect_response_int; dii++)
                        if(new_beta[dii][1] == 0)
                            zero_count = zero_count + 1;
                    
                    if(zero_count == num_effect_response_int)
                        {
                        free_dmatrix(new_beta, 1, num_effect_response_int, 1, 1);
                        free_dmatrix(temp1, 1, 1, 1, num_effect_response_int);
                        break;
                        }
                    
                    
                    
                    
                                        
                    ///////////// save new_beta to iteration_beta //////////
                    for(int dii = 1; dii <= num_effect_response_int; dii++)
                        iteration_beta[dii][1] = new_beta[dii][1];
                    
                    matrixtranspose(iteration_beta, num_effect_response_int, 1, iteration_beta_trans);
                    
                   
                    
                    
                    
                    free_dmatrix(new_beta, 1, num_effect_response_int, 1, 1);
                    free_dmatrix(temp1, 1, 1, 1, num_effect_response_int);
                    
                                  
                } // loop to calculate iteration_beta
            
            
                       
            ///////// calculate number of sparsity element of beta to stop lambda loop ///////
            
            
            int non_zero_count = 0;
            
            for(int dii = 1; dii <= num_effect_response_int; dii++)
                if(iteration_beta[dii][1] != 0)
                    non_zero_count = non_zero_count + 1;
            
            double nummin_sparse_response = double(num_effect_response_int) * spca_conv[0] + 1;
            
            
            cout << "nummin_sparse_response: " << nummin_sparse_response << endl;
            cout << "non_zero_count: " << non_zero_count << endl;
            
            
            free_dmatrix(iteration_alpha, 1, num_effect_response_int,1,1);
            free_dmatrix(iteration_alpha_trans, 1,1,1, num_effect_response_int);
            
            
            
            
            ////// normalize beta //////
            
            double* normalized_beta = dvector(1, num_effect_response_int);
            
            double sum_temp = 0;
            for(int dii = 1; dii <= num_effect_response_int; dii++)
                sum_temp = sum_temp + iteration_beta[dii][1] * iteration_beta[dii][1];
            sum_temp = sqrt(sum_temp);
            
            for(int dii = 1; dii <= num_effect_response_int; dii++)
                normalized_beta[dii] = iteration_beta[dii][1] / sum_temp;
            
            for(int dii = 1; dii <= num_effect_response_int; dii++)
                iteration_beta[dii][1] = normalized_beta[dii];
            
            free_dvector(normalized_beta, 1, num_effect_response_int);
            

            
         
           if( double(non_zero_count) <= nummin_sparse_response  && nummin_sparse_response >= 2)
                {
                    for(int dii = 1; dii <= num_effect_response_int ; dii++)
                    eigenvector[dii] = iteration_beta[dii][1];  
                    free_dmatrix(iteration_beta, 1, num_effect_response_int, 1, 1);
                    free_dmatrix(iteration_beta_trans, 1, 1, 1, num_effect_response_int);

                break;
                }
            
           
            if( double(non_zero_count) == 1 && nummin_sparse_response < 1)
                {
                    for(int dii = 1; dii <= num_effect_response_int ; dii++)
                    eigenvector[dii] = iteration_beta[dii][1];
                    free_dmatrix(iteration_beta, 1, num_effect_response_int, 1, 1);
                    free_dmatrix(iteration_beta_trans, 1, 1, 1, num_effect_response_int);

                break;
                }            
            
                     
            
            
            
            free_dmatrix(iteration_beta, 1, num_effect_response_int, 1, 1);
            free_dmatrix(iteration_beta_trans, 1, 1, 1, num_effect_response_int);
            
            
            
            
        }// loop for lambda_seq
    
    
    
    free_dmatrix(a, 1, num_effect_response_int, 1, num_effect_response_int);
    free_dvector(eigenvalue, 1, num_effect_response_int);
    free_dmatrix(v, 1, num_effect_response_int, 1, num_effect_response_int);
    free_dmatrix(alpha, 1, num_effect_response_int, 1, 1);
    free_dmatrix(alpha_trans, 1, 1, 1, num_effect_response_int);
    free_dmatrix(lambda, 1, 1, 1, num_effect_response_int);
    free_dvector(lambda_seq, 1, 101);
    
    
    
    
    
    
}
