/*
 *  BaggingProcedure.cpp
 *  MultipleTrait_Simulation
 *
 *  Created by Joanne Lin on 1/24/10.
 *  Copyright 2010 __MyCompanyName__. All rights reserved.
 *
 */
# include "ranlib.h"
# include <cstdio>   
# include <cstdlib>  
# include <cmath>    
# include <iostream>
# include <string> 
# include <fstream>
# include "nrutil.h"
# include <ctime>
# include "matrix_tool.h"
# include "stat_tool.h"
# include "BaggingProcedure.h"


void Bagging(double* distribution_tbar, double** all_covariate, double** all_response, 
             double** waldC_trans, int num_response, int num_subject, 
             int num_covariate, int g, double* zobs, int wald_df, double* waldC_trans_basis, char** argv, int projection_dim)
{
    
       
    int dss = 0;
    int drr = 0;
    
    
        
       
    /********************* Bagging Procedure *********************/ 
    
    
    for(int dgg = 1; dgg <= (g+1); dgg++) //dgg index for g (the numbers of loop of wild bootstrap)
    {
        
                     
        
/********************************* dgg = 1 (testing statistics) *********************************/
        
if(dgg ==1) // loop for original test stat
{
           
            /////////// design matrix singularity check and maf control ////////////
            
                        
            bool covariate_nonsigular_index2 = true;
            
            for(int dcc = 1; dcc <= num_covariate && covariate_nonsigular_index2 == true; dcc++)
            {
                
                double** table = dmatrix(1, num_subject, 1, 2); // categorical table for each covariate
                
                for(int dii = 1; dii <=num_subject; dii++)
                    for(int djj = 1; djj <= 2; djj++)
                        table[dii][djj] = -9;
                
                int point = 0;
                 
                for(dss = 1; dss <=num_subject ;dss++) // categorized for each subject's covariate
                {
                    bool categorized = false;
                    
                    for(int dii = 1; (dii <= point + 1) && (categorized == false); dii++)
                    {
                        if(all_covariate[dss][dcc] == table[dii][1])
                        {
                            table[dii][2] = table[dii][2] + 1;
                            categorized = true;
                            break;
                        }
                        
                        if(dii == (point + 1) && categorized == false)
                        {
                            table[point + 1][1] = all_covariate[dss][dcc];
                            table[point + 1][2] = 1;
                            point = point + 1;
                            break;
                        }
                    }// loop for categorize a single element
                }// loop for categorize a single column
                
                
                ///////// calculate maf ///////////
                
                for(int dii = 1; dii <= point; dii++)
                    if( (table[dii][2]/double(num_subject)) >= 0.95)
                    {covariate_nonsigular_index2 = false;
                     break;
                    }
                                
                free_dmatrix(table, 1, num_subject, 1, 2);
                
                if(covariate_nonsigular_index2 == false)
                    break;
                
                
            }  // loop for all covariate  
            
            
            if(covariate_nonsigular_index2 == false)
                break;
            
           
    
    
            //////////////// screening /////////////////
    
            
            double* response_pvalue = dvector(1, num_response);
            
            screening(all_response, all_covariate, response_pvalue, num_response, num_subject, 
                      num_covariate, wald_df, waldC_trans);
    
            
    
    
            double num_effect_response_double ;
    
            if(num_subject >= num_response)
                num_effect_response_double = double(num_response) / log(double(num_response));
    
            if(num_subject < num_response)
                num_effect_response_double = double(num_subject) / log(double(num_subject));
    
            int num_effect_response_int = 0;
    
    
    
            if(num_subject >= num_response)
            for(int dii = 1; dii<= num_response; dii++) // screening for [q/log(q)] + 1 responses 
                if(num_effect_response_double > dii)
                    if(num_effect_response_double <= (dii + 1))
                        num_effect_response_int = dii + 1;
    
    
            if(num_subject < num_response)
                for(int dii = 1; dii<= num_subject; dii++) // screening for [q/log(q)] + 1 responses 
                    if(num_effect_response_double > dii)
                        if(num_effect_response_double <= (dii + 1))
                            num_effect_response_int = dii + 1;
    
    
            //cout << num_effect_response_int << endl;
    
    
            double** response_effective = dmatrix(1, num_subject, 1, num_effect_response_int);
    
            int col_count = 1;
    
            for(int djj = 1; djj <=num_response; djj++)
                if(response_pvalue[djj] == 1)
                {
                    for(int dii = 1; dii <=num_subject; dii++)
                        response_effective[dii][col_count] = all_response[dii][djj];
                    col_count = col_count + 1;    
                }
            
    
    
                               
            //////////////// estimating covariance matrix VP, VQ and VR/////////////////////
            
            double** VQ = dmatrix(1,num_effect_response_int,1,num_effect_response_int);    
                    //cout << "run to here 1" << endl;              
            double** VR = dmatrix(1,num_effect_response_int,1,num_effect_response_int);   
                    //cout << "run to here 2" << endl;
            double** VP = dmatrix(1,num_effect_response_int,1,num_effect_response_int);
            
            covariance_VQ(response_effective, all_covariate, num_subject, num_effect_response_int, num_covariate, 
                          VQ, waldC_trans, wald_df); 
                       
            covariance_VR(response_effective, all_covariate, num_subject, num_effect_response_int, num_covariate, VR);
            
             
            for(drr = 1; drr <= num_effect_response_int; drr++)
                for(int drr2 = 1; drr2 <= num_effect_response_int; drr2++)
                    VP[drr][drr2] = VR[drr][drr2] + VQ[drr][drr2];
            
            
                       
                   
            
            
            ///////////////// estimating w ///////////////
            
            double** L = dmatrix(1,num_effect_response_int,1,num_effect_response_int);
            
            for(drr = 1; drr <= num_effect_response_int; drr++) // initialize L
                for(int drr2 = drr; drr2 <= num_effect_response_int; drr2++)
                    L[drr][drr2] = 0;
            
            for(drr = 1; drr <= num_effect_response_int; drr++)
                for(int drr2 = drr; drr2 <= num_effect_response_int; drr2++)
                    L[drr][drr2] = VR[drr][drr2];
            
            double* diag_L = dvector(1, num_effect_response_int);  
            
            choldc(L, num_effect_response_int, diag_L);//cholesky decomposition
            //cout << "run to here 3" << endl; 
            
            for(drr = 1; drr <= num_effect_response_int; drr++)// calculate L
                for(int drr2 = drr; drr2 <= num_effect_response_int; drr2++)
                {
                    if(drr == drr2)
                        L[drr][drr2] = diag_L[drr];
                    
                    if(drr != drr2)
                    L[drr][drr2] = 0; 
                    
                }// finalize L
            
            
            /*
            for(int dii = 1; dii <= num_response; dii++)
                for(int djj = 1; djj <= num_response; djj++)
                {
                    cout << VR[dii][djj] << " " ;
                    if(djj == num_response)
                        cout << endl;
                }
             
            cout << endl;
            
            for(int dii = 1; dii <= num_response; dii++)
                for(int djj = 1; djj <= num_response; djj++)
                {
                    cout << L[dii][djj] << " " ;
                    if(djj == num_response)
                        cout << endl;
                }
            
            exit(1);
             
             */
            
            invv(L, num_effect_response_int); // L = L_inverse
           //cout << "run to here 4" << endl;
            
            
            double** t_invL = dmatrix(1, num_effect_response_int, 1, num_effect_response_int);
            matrixtranspose(L, num_effect_response_int, num_effect_response_int, t_invL);

            
            double** P = dmatrix(1, num_effect_response_int, 1, num_effect_response_int);
                // L_inverse * VQ * L_inverse_trans   
            
                        
            
            double** temp1 = dmatrix(1, num_effect_response_int, 1, num_effect_response_int); 
                    // L_inverse * VQ
                        
            matrixmultiply(L, num_effect_response_int, num_effect_response_int, VQ, num_effect_response_int,
                           num_effect_response_int, temp1);
            matrixmultiply(temp1, num_effect_response_int, num_effect_response_int, t_invL,
                           num_effect_response_int, num_effect_response_int, P);
            
            
    
            
            //// perform sparse PCA on v /////////

            //
                        
            ofstream output_P;
    
            output_P.open(argv[1]);
    
            for(int dii = 1; dii <= num_effect_response_int; dii++)
                for(int djj = 1; djj <= num_effect_response_int; djj++)
                    {
                        output_P << P[dii][djj] << " ";
                        if(djj == num_effect_response_int)
                            output_P << endl;
                    }
            
            output_P.close();
        
            
    
            //system("cd /Applications");
            
    char *str = (char*)malloc(sizeof(char)*128);
            sprintf(str, "R CMD BATCH %s", argv[2]);    
    
    
            //system("R CMD BATCH run_spca.r");
            system(str);
            
    
            double** eigenvector = dmatrix(1, num_effect_response_int, 1, projection_dim); // the first eigenvector from sparse pca
                
   
            ifstream input_loading;
    
            input_loading.open(argv[3]);
    
        for(int djj = 1; djj <= projection_dim; djj++)
            for(int dii = 1; dii <= num_effect_response_int; dii++)
                input_loading >> eigenvector[dii][djj];
    
            input_loading.close();
    
            //system("rm loading.out");
            //system("rm p.out");
            
    free(str);
    
            double** w_sparse = dmatrix(1, num_effect_response_int, 1, projection_dim); // weight calculation...
    
            matrixmultiply(t_invL, num_effect_response_int, num_effect_response_int, eigenvector, num_effect_response_int, projection_dim, w_sparse);
         
    
       
    
        // assign weight for each original response // 
    
        int weight_count = 1;
    
        double** w = dmatrix(1, num_response, 1, projection_dim);
    
        
            for(int dii = 1; dii <= num_response; dii++)
                {
                if(response_pvalue[dii] == 1)
                    { 
                        for(int djj = 1; djj <= projection_dim; djj++)
                            w[dii][djj] = w_sparse[weight_count][djj];  
                        weight_count = weight_count + 1;   
                    }
        
                if(response_pvalue[dii] == 0)
                    for(int djj = 1; djj <= projection_dim; djj++)
                    	w[dii][djj] = 0;
        
                }
    
    
    
    //for (int dii = 1; dii <= num_response; dii++)
        //cout << w[dii] <<" ";
    //cout << endl;
    //exit(1);
    

    
    
    
    //*****************************************************************//
    //
    // update VR after screening out the response with weight = 0      //
    //
    //*****************************************************************//
    
    
    int num_effect_response_int2;
    num_effect_response_int2 = 0;
    
    for(int dii = 1; dii <= num_response; dii++)
        if(w[dii][1] != 0)
            num_effect_response_int2 = num_effect_response_int2 + 1;
    
    
    if(num_effect_response_int2 > 1)
    {
        
    
    double** response_effective2 = dmatrix(1, num_subject, 1, num_effect_response_int2);
    
    int col_count2 = 1;
    
    for(int djj = 1; djj <=num_response; djj++)
        if(w[djj][1] != 0)
        {
            for(int dii = 1; dii <=num_subject; dii++)
                response_effective2[dii][col_count2] = all_response[dii][djj];
            col_count2 = col_count2 + 1;    
        }
    
    
    
    
    //////////////// estimating covariance matrix VP, VQ and VR/////////////////////
    
    
    double** VQ2 = dmatrix(1,num_effect_response_int2,1,num_effect_response_int2);    
    //cout << "run to here 9" << endl;              
    double** VR2 = dmatrix(1,num_effect_response_int2,1,num_effect_response_int2);   
    //cout << "run to here 10" << endl;
    
    covariance_VQ(response_effective2, all_covariate, num_subject, num_effect_response_int2, num_covariate, 
                  VQ2, waldC_trans, wald_df); 
    
    covariance_VR(response_effective2, all_covariate, num_subject, num_effect_response_int2, num_covariate, VR2);
    
    
    
    
    ///////////////// estimating w ///////////////
    
    double** L2 = dmatrix(1,num_effect_response_int2,1,num_effect_response_int2);
    
    
    for(drr = 1; drr <= num_effect_response_int2; drr++) // initialize L
        for(int drr2 = drr; drr2 <= num_effect_response_int2; drr2++)
            L2[drr][drr2] = 0;
    
    for(drr = 1; drr <= num_effect_response_int2; drr++)
        for(int drr2 = drr; drr2 <= num_effect_response_int2; drr2++)
            L2[drr][drr2] = VR2[drr][drr2];
    
    double* diag_L2 = dvector(1, num_effect_response_int2);  
    
    
    choldc(L2, num_effect_response_int2, diag_L2);//cholesky decomposition
    //cout << "run to here 3" << endl; 
    
    for(drr = 1; drr <= num_effect_response_int2; drr++)// calculate L
        for(int drr2 = drr; drr2 <= num_effect_response_int2; drr2++)
        {
            if(drr == drr2)
                L2[drr][drr2] = diag_L2[drr];
            
            if(drr != drr2)
                L2[drr][drr2] = 0; 
            
        }// finalize L
    
    
    
    invv(L2, num_effect_response_int2); // L = L_inverse
    //cout << "run to here 4" << endl;
    
    
    double** t_invL2 = dmatrix(1, num_effect_response_int2, 1, num_effect_response_int2);
    matrixtranspose(L2, num_effect_response_int2, num_effect_response_int2, t_invL2);
    
    
    double** P2 = dmatrix(1, num_effect_response_int2, 1, num_effect_response_int2);
    // L_inverse * VQ * L_inverse_trans   
    
    
    double** temp1_2 = dmatrix(1, num_effect_response_int2, 1, num_effect_response_int2); 
    // L_inverse * VQ
    
    
    
    
    
    matrixmultiply(L2, num_effect_response_int2, num_effect_response_int2, VQ2, num_effect_response_int2,
                   num_effect_response_int2, temp1_2);
    matrixmultiply(temp1_2, num_effect_response_int2, num_effect_response_int2, t_invL2,
                   num_effect_response_int2, num_effect_response_int2, P2);
    
    
    
    
    //// perform sparse PCA on v ///////////
    
    //ofstream output_P;
    
    output_P.open(argv[1]);
    
    for(int dii = 1; dii <= num_effect_response_int2; dii++)
        for(int djj = 1; djj <= num_effect_response_int2; djj++)
        {
            output_P << P2[dii][djj] << " ";
            if(djj == num_effect_response_int2)
                output_P << endl;
        }
    
    output_P.close();
    
    
    
    //system("cd /Applications");
    
    char *str2 = (char*)malloc(sizeof(char)*128);
    sprintf(str2, "R CMD BATCH %s", argv[2]);    
    
    
    //system("R CMD BATCH run_spca.r");
    system(str2);
    
    
    double** eigenvector2 = dmatrix(1, num_effect_response_int2, 1, projection_dim); // the first eigenvector from sparse pca
    
    
    //ifstream input_loading;
    
    input_loading.open(argv[3]);
    
    for(int dii = 1; dii <= num_effect_response_int2; dii++)
    for(int djj = 1; djj <= projection_dim; djj++)
        input_loading >> eigenvector2[dii][djj];
    
    input_loading.close();
    
    //system("rm loading.out");
    //system("rm p.out");
    
    free(str2);    
    
    
    
    
    
    
    double** w_sparse2 = dmatrix(1, num_effect_response_int2, 1, projection_dim); // weight calculation...
    
       
    matrixmultiply(t_invL2, num_effect_response_int2, num_effect_response_int2, eigenvector2, num_effect_response_int2, projection_dim, w_sparse2);                     
    
    
    
    
    // assign weight for each original response // 
    
    int weight_count2 = 1;
    
    
    for(int dii = 1; dii <= num_response; dii++)
    {
        if(w[dii][1] != 0)
        { 
            for(int djj = 1; djj <= projection_dim; djj++)
            w[dii][djj] = w_sparse2[weight_count2][djj];  
            weight_count2 = weight_count2 + 1;   
        }
        
        if(w[dii][1] == 0)
            for(int djj = 1; djj <= projection_dim; djj++)
            w[dii][djj] = 0;
        
    }
    
    
    //for (int dii = 1; dii <= num_response; dii++)
    //    cout << w[dii] <<" ";
    //cout << endl;
    //exit(1);
    
    
    free_dmatrix(response_effective2 , 1, num_subject, 1, num_effect_response_int2);
    free_dmatrix(VQ2, 1, num_effect_response_int2, 1, num_effect_response_int2);   
    free_dmatrix(VR2, 1, num_effect_response_int2, 1, num_effect_response_int2);   
    free_dmatrix(L2, 1, num_effect_response_int2, 1, num_effect_response_int2);
    free_dvector(diag_L2, 1, num_effect_response_int2);
    free_dmatrix(t_invL2, 1, num_effect_response_int2, 1, num_effect_response_int2);
    free_dmatrix(P2, 1, num_effect_response_int2, 1, num_effect_response_int2);
    free_dmatrix(temp1_2, 1, num_effect_response_int2, 1, num_effect_response_int2);
    free_dmatrix(eigenvector2, 1, num_effect_response_int2, 1, projection_dim);
    free_dmatrix(w_sparse2, 1, num_effect_response_int2, 1, projection_dim);    
    
        
    } // if (num_effect_response_int2 > 1)
    
    
    
    
    
        ////// normalize weight //////
    
        double** normalized_w = dmatrix(1, num_response, 1, projection_dim);
    
    for(int djj = 1; djj <= projection_dim; djj++)
    {
        
    
        double sum_temp = 0;
        for(int dii = 1; dii <= num_response; dii++)
            sum_temp = sum_temp + w[dii][djj] * w[dii][djj];
        sum_temp = sqrt(sum_temp);
    
        for(int dii = 1; dii <= num_response; dii++)
            normalized_w[dii][djj] = w[dii][djj] / sum_temp;
    
        for(int dii = 1; dii <= num_response; dii++)
            w[dii][djj] = normalized_w[dii][djj];
    }
    
        free_dmatrix(normalized_w, 1, num_response, 1, projection_dim);
    
    
           
            ////////////// free memory /////////////
            
            
            
            free_dmatrix(VP, 1, num_effect_response_int, 1, num_effect_response_int);
            free_dmatrix(VQ, 1, num_effect_response_int, 1, num_effect_response_int);
            free_dmatrix(VR, 1, num_effect_response_int, 1, num_effect_response_int);
            free_dmatrix(L, 1, num_effect_response_int, 1, num_effect_response_int);
            free_dvector(diag_L,1,num_effect_response_int);
            free_dmatrix(P, 1, num_effect_response_int, 1, num_effect_response_int);
            free_dmatrix(eigenvector,1,num_effect_response_int, 1, projection_dim);
            free_dmatrix(t_invL, 1, num_effect_response_int, 1, num_effect_response_int);
            free_dmatrix(temp1, 1, num_effect_response_int, 1, num_effect_response_int);
    
            free_dvector(response_pvalue, 1, num_response);
            free_dmatrix(response_effective, 1, num_subject, 1, num_effect_response_int);
    
            free_dmatrix(w_sparse, 1, num_effect_response_int, 1, projection_dim);
    
    
                      
            
            /**************************** estimate T ******************************/
                        
            
                       
                   
            
            double** w0y = dmatrix(1,num_subject,1,projection_dim);
    
    //for(int djj = 1; djj <= projection_dim; djj++)
    //    {              
    //      for(dss = 1; dss <= num_subject; dss++)//w0y computing
    //        { double sum = 0;
    //            for(drr = 1; drr<= num_response; drr++)
    //                sum = sum + all_response[dss][drr]*w[drr][djj];
    //            w0y[dss][djj] = sum;  
                
    //        }
    //    }
    
    matrixmultiply(all_response, num_subject, num_response, w, num_response, projection_dim, w0y);
           
            double chi_stat = test_stat(w0y, all_covariate, num_subject, num_covariate, waldC_trans, wald_df, projection_dim);
            //cout << "run to here 6" << endl;
            
            
                       
            free_dmatrix(w0y, 1, num_subject, 1, projection_dim); 
            free_dmatrix(w, 1, num_response, 1 , projection_dim);             
                          
                               
            zobs[1] = chi_stat;
                    
            //cout << zobs[1] << endl;   
                        
            
}//loop for s and dgg = 1 (raw testing stat)

                

        
        
        
        
/********************************* dgg >= 2 ************************************/
        
        /////////// generate wild bootstrap epsilon_star  after 2nd g-loop//////////
        
                
        
if(dgg >= 2) 
        {
           
            double* epsilon_star = dvector(1, num_subject);
            
            for(dss = 1; dss <= num_subject; dss++)
            {    
                /*
                float* setgmn_parm2 = vector(0, 1*(1+3)/2);
                for(int dii = 0; dii <= 1*(1+3)/2; dii++)
                    setgmn_parm2[dii] = 0.0;
                
                float* ccovar_mn2 = vector(0,0);
                for(int dii = 0; dii < 1; dii++) // working covariance matrix
                    ccovar_mn2[dii] = 1.0;
                
                float* mean_mn2 = vector(0,0);
                for(int dii = 0; dii < 1; dii++)
                    mean_mn2[dii] = 0.0;    
                
                long p = 1;
                
                setgmn(mean_mn2, ccovar_mn2, p, setgmn_parm2);
                
                float* work = vector(0, 0);
                float* temp = vector(0, 0);
                
                genmn(setgmn_parm2,work,temp);
                
                epsilon_star[dss]= work[0]; //epsilon_star generating
                
                free_vector(work, 0, 0);
                free_vector(temp, 0, 0);
                free_vector(setgmn_parm2, 0 ,1*(1+3)/2);
                free_vector(ccovar_mn2, 0, 0);     
                free_vector(mean_mn2, 0, 0);
                */
                
                //
                 epsilon_star[dss] = double (ignbin(1, 0.5));
                 
                 if(epsilon_star[dss] == 0)
                 epsilon_star[dss] = -1;
                 //
                
                
            }
            
            
                    
            /////////// design matrix singularity check and maf control ////////////
            
            
            bool covariate_nonsigular_index2 = true;
            
            for(int dcc = 1; dcc <= num_covariate && covariate_nonsigular_index2 == true; dcc++)
            {
                
                double** table = dmatrix(1, num_subject, 1, 2); // categorical table for each covariate
                
                for(int dii = 1; dii <=num_subject; dii++)
                    for(int djj = 1; djj <= 2; djj++)
                        table[dii][djj] = -9;
                
                int point = 0;
                
                for(dss = 1; dss <=num_subject ;dss++) // categorized for each subject's covariate
                {
                    bool categorized = false;
                    
                    for(int dii = 1; (dii <= point + 1) && (categorized == false); dii++)
                    {
                        if(all_covariate[dss][dcc] == table[dii][1])
                        {
                            table[dii][2] = table[dii][2] + 1;
                            categorized = true;
                            break;
                        }
                        
                        if(dii == (point + 1) && categorized == false)
                        {
                            table[point + 1][1] = all_covariate[dss][dcc];
                            table[point + 1][2] = 1;
                            point = point + 1;
                            break;
                        }
                    }// loop for categorize a single element
                }// loop for categorize a single column
                
                
                ///////// calculate maf ///////////
                
                for(int dii = 1; dii <= point; dii++)
                    if( (table[dii][2]/double(num_subject)) >= 0.95)
                    {covariate_nonsigular_index2 = false;
                        break;
                    }
                
                free_dmatrix(table, 1, num_subject, 1, 2);
                
                if(covariate_nonsigular_index2 == false)
                    break;
                
                
            }  // loop for all covariate  
            
            
            if(covariate_nonsigular_index2 == false)
                break;
            
            
            /////////////// wild bootstrap sample generating /////////////////
            
            double** wildb_response = dmatrix(1, num_subject, 1, num_response);
            
            wildb_sample_generating(epsilon_star, all_response, all_covariate, num_subject, num_covariate, num_response, wildb_response, waldC_trans, wald_df);
            
            
            
            
            
            //////////////// screening /////////////////
            
            double* response_pvalue = dvector(1, num_response);
            
            screening(wildb_response, all_covariate, response_pvalue, num_response, num_subject, 
                      num_covariate, wald_df, waldC_trans);
            
            double num_effect_response_double ;
            
            if(num_subject >= num_response)
                num_effect_response_double = double(num_response) / log(double(num_response));
            
            if(num_subject < num_response)
                num_effect_response_double = double(num_subject) / log(double(num_subject));
            
            int num_effect_response_int = 0;
            
            
            
            if(num_subject >= num_response)
                for(int dii = 1; dii<= num_response; dii++) // screening for [q/log(q)] + 1 responses 
                    if(num_effect_response_double > dii)
                        if(num_effect_response_double <= (dii + 1))
                            num_effect_response_int = dii + 1;
            
            
            if(num_subject < num_response)
                for(int dii = 1; dii<= num_subject; dii++) // screening for [q/log(q)] + 1 responses 
                    if(num_effect_response_double > dii)
                        if(num_effect_response_double <= (dii + 1))
                            num_effect_response_int = dii + 1;
            
            
            //cout << num_effect_response_int << endl;
            
            
            double** response_effective = dmatrix(1, num_subject, 1, num_effect_response_int);
            
            int col_count = 1;
            
            for(int djj = 1; djj <=num_response; djj++)
                if(response_pvalue[djj] == 1)
                {
                    for(int dii = 1; dii <=num_subject; dii++)
                        response_effective[dii][col_count] = wildb_response[dii][djj];
                    col_count = col_count + 1;    
                }
            
            
            
            //////////////// estimating covariance matrix VP, VQ and VR/////////////////////
          

            double** VQ = dmatrix(1,num_effect_response_int,1,num_effect_response_int);    
            //cout << "run to here 9" << endl;              
            double** VR = dmatrix(1,num_effect_response_int,1,num_effect_response_int);   
            //cout << "run to here 10" << endl;
            double** VP = dmatrix(1,num_effect_response_int,1,num_effect_response_int);
            
            covariance_VQ(response_effective, all_covariate, num_subject, num_effect_response_int, num_covariate, 
                          VQ, waldC_trans, wald_df); 
            
            covariance_VR(response_effective, all_covariate, num_subject, num_effect_response_int, num_covariate, VR);
            
            
            for(drr = 1; drr <= num_effect_response_int; drr++)
                for(int drr2 = 1; drr2 <= num_effect_response_int; drr2++)
                    VP[drr][drr2] = VR[drr][drr2] + VQ[drr][drr2];
            
            
            
            
            
            
            ///////////////// estimating w ///////////////
            
            double** L = dmatrix(1,num_effect_response_int,1,num_effect_response_int);
            
            for(drr = 1; drr <= num_effect_response_int; drr++) // initialize L
                for(int drr2 = drr; drr2 <= num_effect_response_int; drr2++)
                    L[drr][drr2] = 0;
            
            for(drr = 1; drr <= num_effect_response_int; drr++)
                for(int drr2 = drr; drr2 <= num_effect_response_int; drr2++)
                    L[drr][drr2] = VR[drr][drr2];
            
            double* diag_L = dvector(1, num_effect_response_int);  
            
            choldc(L, num_effect_response_int, diag_L);//cholesky decomposition
            //cout << "run to here 3" << endl; 
            
            for(drr = 1; drr <= num_effect_response_int; drr++)// calculate L
                for(int drr2 = drr; drr2 <= num_effect_response_int; drr2++)
                {
                    if(drr == drr2)
                        L[drr][drr2] = diag_L[drr];
                    
                    if(drr != drr2)
                        L[drr][drr2] = 0; 
                    
                }// finalize L
            
            
            /*
             for(int dii = 1; dii <= num_response; dii++)
             for(int djj = 1; djj <= num_response; djj++)
             {
             cout << VR[dii][djj] << " " ;
             if(djj == num_response)
             cout << endl;
             }
             
             cout << endl;
             
             for(int dii = 1; dii <= num_response; dii++)
             for(int djj = 1; djj <= num_response; djj++)
             {
             cout << L[dii][djj] << " " ;
             if(djj == num_response)
             cout << endl;
             }
             
             exit(1);
             
             */
            
            invv(L, num_effect_response_int); // L = L_inverse
            //cout << "run to here 4" << endl;
            
            
            double** t_invL = dmatrix(1, num_effect_response_int, 1, num_effect_response_int);
            matrixtranspose(L, num_effect_response_int, num_effect_response_int, t_invL);
            
            
            double** P = dmatrix(1, num_effect_response_int, 1, num_effect_response_int);
            // L_inverse * VQ * L_inverse_trans   
            
            
            double** temp1 = dmatrix(1, num_effect_response_int, 1, num_effect_response_int); 
            // L_inverse * VQ
            
            matrixmultiply(L, num_effect_response_int, num_effect_response_int, VQ, num_effect_response_int,
                           num_effect_response_int, temp1);
            matrixmultiply(temp1, num_effect_response_int, num_effect_response_int, t_invL,
                           num_effect_response_int, num_effect_response_int, P);
            
            
            
            //// perform sparse PCA on v /////////
                       
            ofstream output_P;
            
            output_P.open(argv[1]);
            
            for(int dii = 1; dii <= num_effect_response_int; dii++)
                for(int djj = 1; djj <= num_effect_response_int; djj++)
                {
                    output_P << P[dii][djj] << " ";
                    if(djj == num_effect_response_int)
                        output_P << endl;
                }
            
            output_P.close();
            
            
            
            //system("cd /Applications");
            
            char *str = (char*)malloc(sizeof(char)*128);
            sprintf(str, "R CMD BATCH %s", argv[2]);    
            
            
            //cout << str << endl;
            //exit(1);
            
            //system("R CMD BATCH run_spca.r");
            system(str);
            
            double** eigenvector = dmatrix(1, num_effect_response_int, 1, projection_dim); // the first eigenvector from sparse pca
            
            
            ifstream input_loading;
            
            input_loading.open(argv[3]);
            
               for(int dii = 1; dii <= num_effect_response_int; dii++)
                   for(int djj = 1; djj <= projection_dim; djj++)
                       input_loading >> eigenvector[dii][djj];
            
            input_loading.close();
            
            //system("rm loading.out");
            //system("rm p.out");
            
            free(str);
            
            double** w_sparse = dmatrix(1, num_effect_response_int, 1, projection_dim); // weight calculation...
            
            matrixmultiply(t_invL, num_effect_response_int, num_effect_response_int, eigenvector, num_effect_response_int, projection_dim, w_sparse);
            
            
            
            
            // assign weight for each original response // 
            
            int weight_count = 1;
            
            double** w = dmatrix(1, num_response, 1, projection_dim);
            
            
            for(int dii = 1; dii <= num_response; dii++)
            {
                if(response_pvalue[dii] == 1)
                { 
                    for(int djj = 1; djj <= projection_dim; djj++)
                        w[dii][djj] = w_sparse[weight_count][djj];  
                    weight_count = weight_count + 1;   
                }
                
                if(response_pvalue[dii] == 0)
                    for(int djj = 1; djj <= projection_dim; djj++)
                        w[dii][djj] = 0;
                
            }
            
            
            
            //for (int dii = 1; dii <= num_response; dii++)
            //cout << w[dii] <<" ";
            //cout << endl;
            //exit(1);
            
            
            
            
            
            //*****************************************************************//
            //
            // update VR after screening out the response with weight = 0      //
            //
            //*****************************************************************//
            
            
            int num_effect_response_int2;
            num_effect_response_int2 = 0;
            
            for(int dii = 1; dii <= num_response; dii++)
                if(w[dii][1] != 0)
                    num_effect_response_int2 = num_effect_response_int2 + 1;
            
            
            if(num_effect_response_int2 > 1)
            {
                
                
                double** response_effective2 = dmatrix(1, num_subject, 1, num_effect_response_int2);
                
                int col_count2 = 1;
                
                for(int djj = 1; djj <=num_response; djj++)
                    if(w[djj][1] != 0)
                    {
                        for(int dii = 1; dii <=num_subject; dii++)
                            response_effective2[dii][col_count2] = wildb_response[dii][djj];
                        col_count2 = col_count2 + 1;    
                    }
                
                
                
                
                //////////////// estimating covariance matrix VP, VQ and VR/////////////////////
                
                
                double** VQ2 = dmatrix(1,num_effect_response_int2,1,num_effect_response_int2);    
                //cout << "run to here 9" << endl;              
                double** VR2 = dmatrix(1,num_effect_response_int2,1,num_effect_response_int2);   
                //cout << "run to here 10" << endl;
                
                covariance_VQ(response_effective2, all_covariate, num_subject, num_effect_response_int2, num_covariate, 
                              VQ2, waldC_trans, wald_df); 
                
                covariance_VR(response_effective2, all_covariate, num_subject, num_effect_response_int2, num_covariate, VR2);
                
                
                
                
                ///////////////// estimating w ///////////////
                
                double** L2 = dmatrix(1,num_effect_response_int2,1,num_effect_response_int2);
                
                
                for(drr = 1; drr <= num_effect_response_int2; drr++) // initialize L
                    for(int drr2 = drr; drr2 <= num_effect_response_int2; drr2++)
                        L2[drr][drr2] = 0;
                
                for(drr = 1; drr <= num_effect_response_int2; drr++)
                    for(int drr2 = drr; drr2 <= num_effect_response_int2; drr2++)
                        L2[drr][drr2] = VR2[drr][drr2];
                
                double* diag_L2 = dvector(1, num_effect_response_int2);  
                
                
                choldc(L2, num_effect_response_int2, diag_L2);//cholesky decomposition
                //cout << "run to here 3" << endl; 
                
                for(drr = 1; drr <= num_effect_response_int2; drr++)// calculate L
                    for(int drr2 = drr; drr2 <= num_effect_response_int2; drr2++)
                    {
                        if(drr == drr2)
                            L2[drr][drr2] = diag_L2[drr];
                        
                        if(drr != drr2)
                            L2[drr][drr2] = 0; 
                        
                    }// finalize L
                
                
                
                invv(L2, num_effect_response_int2); // L = L_inverse
                //cout << "run to here 4" << endl;
                
                
                double** t_invL2 = dmatrix(1, num_effect_response_int2, 1, num_effect_response_int2);
                matrixtranspose(L2, num_effect_response_int2, num_effect_response_int2, t_invL2);
                
                
                double** P2 = dmatrix(1, num_effect_response_int2, 1, num_effect_response_int2);
                // L_inverse * VQ * L_inverse_trans   
                
                
                double** temp1_2 = dmatrix(1, num_effect_response_int2, 1, num_effect_response_int2); 
                // L_inverse * VQ
                
                
                
                
                
                matrixmultiply(L2, num_effect_response_int2, num_effect_response_int2, VQ2, num_effect_response_int2,
                               num_effect_response_int2, temp1_2);
                matrixmultiply(temp1_2, num_effect_response_int2, num_effect_response_int2, t_invL2,
                               num_effect_response_int2, num_effect_response_int2, P2);
                
                
                
                
                //// perform sparse PCA on v ///////////
                
                //ofstream output_P;
                
                output_P.open(argv[1]);
                
                for(int dii = 1; dii <= num_effect_response_int2; dii++)
                    for(int djj = 1; djj <= num_effect_response_int2; djj++)
                    {
                        output_P << P2[dii][djj] << " ";
                        if(djj == num_effect_response_int2)
                            output_P << endl;
                    }
                
                output_P.close();
                
                
                
                //system("cd /Applications");
                
                char *str2 = (char*)malloc(sizeof(char)*128);
                sprintf(str2, "R CMD BATCH %s", argv[2]);    
                
                
                //system("R CMD BATCH run_spca.r");
                system(str2);
                
                
                double** eigenvector2 = dmatrix(1, num_effect_response_int2, 1, projection_dim); // the first eigenvector from sparse pca
                
                
                //ifstream input_loading;
                
                input_loading.open(argv[3]);
                
                    for(int dii = 1; dii <= num_effect_response_int2; dii++)
                        for(int djj = 1; djj <= projection_dim; djj++)
                            input_loading >> eigenvector2[dii][djj];    
                
                input_loading.close();
                
                //system("rm loading.out");
                //system("rm p.out");
                
                free(str2);    
                
                
                
                
                
                
                double** w_sparse2 = dmatrix(1, num_effect_response_int2, 1, projection_dim); // weight calculation...
                
                
                matrixmultiply(t_invL2, num_effect_response_int2, num_effect_response_int2, eigenvector2, num_effect_response_int2, projection_dim, w_sparse2);                     
                
                
                
                
                // assign weight for each original response // 
                
                int weight_count2 = 1;
                
                
                for(int dii = 1; dii <= num_response; dii++)
                {
                    if(w[dii][1] != 0)
                    { 
                        for(int djj = 1; djj <= projection_dim; djj++)
                            w[dii][djj] = w_sparse2[weight_count2][djj];  
                        weight_count2 = weight_count2 + 1;   
                    }
                    
                    if(w[dii][1] == 0)
                        for(int djj = 1; djj <= projection_dim; djj++)
                            w[dii][djj] = 0;
                    
                }
                
                
                //for (int dii = 1; dii <= num_response; dii++)
                //    cout << w[dii] <<" ";
                //cout << endl;
                //exit(1);
                
                
                free_dmatrix(response_effective2 , 1, num_subject, 1, num_effect_response_int2);
                free_dmatrix(VQ2, 1, num_effect_response_int2, 1, num_effect_response_int2);   
                free_dmatrix(VR2, 1, num_effect_response_int2, 1, num_effect_response_int2);   
                free_dmatrix(L2, 1, num_effect_response_int2, 1, num_effect_response_int2);
                free_dvector(diag_L2, 1, num_effect_response_int2);
                free_dmatrix(t_invL2, 1, num_effect_response_int2, 1, num_effect_response_int2);
                free_dmatrix(P2, 1, num_effect_response_int2, 1, num_effect_response_int2);
                free_dmatrix(temp1_2, 1, num_effect_response_int2, 1, num_effect_response_int2);
                free_dmatrix(eigenvector2, 1, num_effect_response_int2, 1, projection_dim);
                free_dmatrix(w_sparse2, 1, num_effect_response_int2, 1, projection_dim);    
                
                
            } // if (num_effect_response_int2 > 1)
            
            
            
            
            
            ////// normalize weight //////
            
            double** normalized_w = dmatrix(1, num_response, 1, projection_dim);
            
            for(int djj = 1; djj <= projection_dim; djj++)
            {
                
                
                double sum_temp = 0;
                for(int dii = 1; dii <= num_response; dii++)
                    sum_temp = sum_temp + w[dii][djj] * w[dii][djj];
                sum_temp = sqrt(sum_temp);
                
                for(int dii = 1; dii <= num_response; dii++)
                    normalized_w[dii][djj] = w[dii][djj] / sum_temp;
                
                for(int dii = 1; dii <= num_response; dii++)
                    w[dii][djj] = normalized_w[dii][djj];
            }
            
            free_dmatrix(normalized_w, 1, num_response, 1, projection_dim);
            
            
            
            ////////////// free memory /////////////
            
            
            
            free_dmatrix(VP, 1, num_effect_response_int, 1, num_effect_response_int);
            free_dmatrix(VQ, 1, num_effect_response_int, 1, num_effect_response_int);
            free_dmatrix(VR, 1, num_effect_response_int, 1, num_effect_response_int);
            free_dmatrix(L, 1, num_effect_response_int, 1, num_effect_response_int);
            free_dvector(diag_L,1,num_effect_response_int);
            free_dmatrix(P, 1, num_effect_response_int, 1, num_effect_response_int);
            free_dmatrix(eigenvector,1,num_effect_response_int, 1, projection_dim);
            free_dmatrix(t_invL, 1, num_effect_response_int, 1, num_effect_response_int);
            free_dmatrix(temp1, 1, num_effect_response_int, 1, num_effect_response_int);
            
            free_dvector(response_pvalue, 1, num_response);
            free_dmatrix(response_effective, 1, num_subject, 1, num_effect_response_int);
            
            free_dmatrix(w_sparse, 1, num_effect_response_int, 1, projection_dim);
            
            
            
            
            /**************************** estimate T ******************************/
            
            
            
            
            
            double** w0y = dmatrix(1,num_subject,1,projection_dim);
            
            matrixmultiply(wildb_response, num_subject, num_response, w, num_response, projection_dim, w0y);
            
            double chi_stat = test_stat(w0y, all_covariate, num_subject, num_covariate, waldC_trans, wald_df, projection_dim);
            //cout << "run to here 6" << endl;
            
            
            
            free_dmatrix(w0y, 1, num_subject, 1, projection_dim); 
            free_dmatrix(w, 1, num_response, 1 , projection_dim);             
            
            
                       
            distribution_tbar[dgg - 1] = chi_stat;
                
                        
               //cout << "distribution_tbar[dgg]: " << distribution_tbar[dgg - 1] << endl;
            
            
            free_dvector(epsilon_star, 1, num_subject);   
            free_dmatrix(wildb_response, 1, num_subject, 1, num_response);
            
    
                      
        }// loop for dgg >=2
        
        
        
                         
        //cout << dgg << " iteration is completed (g)" << endl;  
        
    }//loop for g
    
    






}
