#include <cstdio>   
# include <cstdlib> 
# include <cmath>     
# include <iostream>
# include <string>
# include <fstream>
 
# include "nrutil.h" 
# include "functions.h" 
# include "sem.h"



/////////////////
// Simulation
/////////////////

// f1(ksi1)=ksi1^2/3-1.5, f2(ksi2)=sin(ksi2), f3(ksi1, ksi2)=ksi1*sin(ksi2)
// set part of the coefficents to zero
// Think that Gamma*H term is replaced by f1(ksi1)+f2(ksi2) term
// sumfs = f1(ksi1)+f2(ksi2), a NN*NQ1 matrix
void sim(int**	indm, double**	C, double**	Am, double** trueLmd, 
		 double** B, double**	Phi, double** Pi, double* psi, double* psz, double** X, 
		double** Lmdy, double** ksi, double** eta, double** Omg, double **Y)
{
	//NN = 500; NP = 9; NQ = 3; NQ1=1; NQ2 = 2; Nr = 9; NX = 3; NK = 5;   
	int			i, j, k, ny = Nr+NQ;

	double**	Pi0 = dmatrix(1, NQ1, 1, NQ1);
	double**	invPi0 = dmatrix(1, NQ1, 1, NQ1);

	for(i=1; i<=Nr; i++){
		for(j=1; j<=NN; j++)
			C[i][j] = 1.0;
	}
	Phi[1][1]=1.0; Phi[2][2]=1.0; Phi[1][2]=Phi[2][1]=0.25;
	for(i=1; i<=NP; i++)	psi[i] = 0.36;
	psz[1]=0.36;
	for(i=1; i<=NQ1; i++){
		for(j=1; j<=NQ1; j++){
			Pi[i][j] = 0.0;
			if(i==j)
				Pi0[i][j] = 1-Pi[i][j];
			else 
				Pi0[i][j] = -Pi[i][j];
			invPi0[i][j] = Pi0[i][j];
		}
		for(j=1; j<=NX; j++)
			B[i][j] = 0.36;
	}
	invv(invPi0, NQ1);
	B[1][3] = 0.0; 

	for(i=1; i<=NP; i++){
		for(j=1; j<=Nr; j++)
			Am[i][j] = 0.0;
		for(j=1; j<=ny; j++)
			indm[i][j] = 0; 
		for(j=1; j<=NQ; j++)
			trueLmd[i][j] = 0.0;
	}
	Am[4][4]=Am[5][5]=Am[6][6]=Am[7][7]=Am[8][8]=Am[9][9]=0.36;  // Am
	indm[4][4]=indm[5][5]=indm[6][6]=indm[7][7]=indm[8][8]=indm[9][9]=1;  // for Am
	indm[2][1+Nr]=indm[3][1+Nr]=indm[5][2+Nr]=indm[6][2+Nr]=indm[8][3+Nr]=indm[9][3+Nr]=1; // for Lambda in ME
	trueLmd[1][1]=trueLmd[4][2]=trueLmd[7][3]=1.0;
	trueLmd[2][1]=trueLmd[3][1]=trueLmd[5][2]=trueLmd[6][2]=trueLmd[8][3]=trueLmd[9][3]=0.36;
	
	for(i=1; i<=NP; i++){
		for(j=1; j<=ny; j++){
			if(j<=Nr)	Lmdy[i][j] = Am[i][j];
			else	Lmdy[i][j] = trueLmd[i][j-Nr];
		}		
	}


	// Simulate data
	double		tau1=.5, tau2=.4, tau3=.5; 
	double*		muyi = dvector(1, NP);
	double*		gyi = dvector(1, ny);
	double*		tempi=dvector(1, NQ1);
	double*		tempi2=zerodvector(NQ1);
	double*		muy = dvector(1, NP);
	double**	fs = dmatrix(1, NN, 1, 3);

	// Simulate wi=(eta_i, ksi_i), ksi_i from Normal(0, Phi), and eta_i from SE
	multinor(NN, NQ2, Phi, ksi);	

	for(i=1; i<=NN; i++){
		// Determine f1, f2, f3
		fs[i][1] = sin(1.0*ksi[i][1])-1.0*ksi[i][1]+.5; // *good plot with s[i][2] = exp(ksi[i][2])/2.5-2.5;// but bad coefs
		fs[i][2] = 1.0*(exp(ksi[i][2])/2.5-3.0);

		// Simulate xi1 from binomial(1, tau1), xi2 from N(tau2, 1)
		X[i][1] = 2.0*rbinom(1, tau1);
		X[i][2] = 1.0*(-tau3 + gasdev());
		X[i][3] = 1.0*rbinom(1, tau2);			

		// Simulate eta_i from SE
		for(j=1; j<=NQ1; j++){
			tempi2[j] = 0.0;
			for(k=1; k<=NX; k++)
				tempi2[j] += B[j][k]*X[i][k];
			tempi2[j] += (fs[i][1]+fs[i][2]);
			tempi[j] = sqrt(psz[j])*gasdev()+tempi2[j];
		}
		dmatrixmultiply(invPi0, NQ1, NQ1, tempi, eta[i]);

		for(j=1; j<=NQ; j++){
			if(j<=NQ1)	Omg[i][j] = eta[i][j];
			else	Omg[i][j] = ksi[i][j-NQ1];
		}

		// Simulate yi from the measurement equation
		for(j=1; j<=ny; j++){	
			if(j<=Nr)	gyi[j] = C[j][i];
			else	gyi[j] = Omg[i][j-Nr];
		}
		dmatrixmultiply(Lmdy, NP, ny, gyi, muy);
		for(j=1; j<=NP; j++)
			Y[j][i] = gasdev()*sqrt(psi[j])+muy[j];

	} // for i in 1:NN

	double** Yt=dmatrix(1, NN, 1, NP);
	dmatrixtranspose(Y, NP, NN, Yt);
	savesim(Yt, X, NN, NP, NX, "SimData.txt");
	free_dmatrix(Yt, 1, NN, 1, NP);

	free_dmatrix(Pi0, 1, NQ1, 1, NQ1);		free_dmatrix(invPi0, 1, NQ1, 1, NQ1);		free_dvector(muyi, 1, NP);
	free_dvector(gyi, 1, ny);		free_dvector(tempi, 1, NQ1);		free_dvector(tempi2, 1, NQ1);
	free_dvector(muy, 1, NP);		free_dmatrix(fs, 1, NN, 1, 3);

} //End of sim()



// allparasim() put all unknown parameters in a vector for Simulation
// *para is the output vector
// para = (A55, ..., A99, Lmd21, Lmd31, Lmd52, Lmd62, Lmd83, Lmd93, psi1, psi2, psi4,.., psiNP, b1, b2, gamma1, ..., gamma3,  lassolmd1, ..., lassolmd3)
int allparasim(double *para, double **lmdy, double **lmdom, double *psi, double *psz,
			 double **Phi, double **lassolmd)
{
	int i, j; 

	for(i=1; i<=NP; i++)
		para[i] = lmdy[i][i];
	para[NP+1] = lmdy[2][Nr+1];	para[NP+2] = lmdy[3][Nr+1];
	para[NP+3] = lmdy[5][Nr+2];	para[NP+4] = lmdy[6][Nr+2];
	para[NP+5] = lmdy[8][Nr+3];	para[NP+6] = lmdy[9][Nr+3];
	
	for(i=1; i<=NP; i++)
		para[NP+6+i] = psi[i];
	for(i=1; i<=NST; i++)
		para[NP+6+NP+i] = lmdom[1][i];
	for(i=1; i<=NQ1; i++)
		para[NP+6+NP+NST+i] = psz[i];
	para[NP+6+NP+NST+NQ1+1] = Phi[1][1];
	para[NP+6+NP+NST+NQ1+2] = Phi[1][2];
	para[NP+6+NP+NST+NQ1+3] = Phi[2][2];
	
	j = NP+6+NP+NST+NQ1+3;

	for(i=1; i<=Nlassolmd; i++)
		para[j+i] = lassolmd[1][i];
	j += Nlassolmd;

	return	j;
} // End of allpara()



// allparasimstd() put all unknown parameters in a vector for Simulation
// For standardization version, parameters converted back to the original scale
// *para is the output vector
// para = (A55, ..., A99, Lmd21, Lmd31, Lmd52, Lmd62, Lmd83, Lmd93, psi1, psi2, psi4,.., psiNP, b1, b2, gamma1, ..., gamma3,  lassolmd1, ..., lassolmd3)
int allparasimstd(double *para, double **lmdy, double **lmdom, double *psi, double *psz,
			 double **Phi, double **lassolmd, double *sstd)
{
	int i, j; 

	for(i=1; i<=NP; i++)
		para[i] = lmdy[i][i];
	para[NP+1] = lmdy[2][Nr+1];	para[NP+2] = lmdy[3][Nr+1];
	para[NP+3] = lmdy[5][Nr+2];	para[NP+4] = lmdy[6][Nr+2];
	para[NP+5] = lmdy[8][Nr+3];	para[NP+6] = lmdy[9][Nr+3];
	
	for(i=1; i<=NP; i++)
		para[NP+6+i] = psi[i];
	for(i=1; i<=NST; i++){
		if(i<=NQ1+NX)
			para[NP+6+NP+i] = lmdom[1][i]/sstd[i];
		else
			para[NP+6+NP+i] = lmdom[1][i];
	}
	for(i=1; i<=NQ1; i++)
		para[NP+6+NP+NST+i] = psz[i];
	para[NP+6+NP+NST+NQ1+1] = Phi[1][1];
	para[NP+6+NP+NST+NQ1+2] = Phi[1][2];
	para[NP+6+NP+NST+NQ1+3] = Phi[2][2];
	
	j = NP+6+NP+NST+NQ1+3;

	for(i=1; i<=Nlassolmd; i++)
		para[j+i] = lassolmd[1][i];
	j += Nlassolmd;

	return	j;
} // End of allpara()





// Print estimation results from sim1
// para = (Lmd21, Lmd31, Lmd52, Lmd62, Lmd83, Lmd93, psi1,..., psiNP, b1, b2, gamma1, ..., gamma3,  )
void printsim(double *avgpara, double **lmdy, double *psi)
{
	int			i, j;
	double*		avgpsi = dvector(1, NP);
	double*		avgpsz = dvector(1, NQ1);
	double*		avglassolmd = dvector(1, Nlassolmd);
	double**	avgPhi = dmatrix(1, NQ2, 1, NQ2);
	double**	avglmdy = dmatrix(1, NP, 1, NY);
	double**	avgAm = dmatrix(1, NP, 1, Nr);
	double**	avglmd = dmatrix(1, NP, 1, NQ);
	double**	avglmdom = dmatrix(1, NQ1, 1, NST);
	int*		indlmdom = zeroivector(NST);
	double**	printlmdom = dmatrix(1, NST, 1, NQ1+1);

	for(i=1; i<=NP; i++){
		for(j=1; j<=NY; j++)
			avglmdy[i][j] = lmdy[i][j];
	}
	for(i=1; i<=NP; i++)
		avgpsi[i] = avgpara[NP+6+i];
	for(i=1; i<=NP; i++)
		avglmdy[i][i] = avgpara[i];
	avglmdy[2][Nr+1] = avgpara[NP+1];	avglmdy[3][Nr+1] = avgpara[NP+2];
	avglmdy[5][Nr+2] = avgpara[NP+3];	avglmdy[6][Nr+2] = avgpara[NP+4];
	avglmdy[8][Nr+3] = avgpara[NP+5];	avglmdy[9][Nr+3] = avgpara[NP+6];

	for(i=1; i<=NP; i++){
		for(j=1; j<=Nr; j++)
			avgAm[i][j] = avglmdy[i][j];
		for(j=1; j<=NQ; j++)
			avglmd[i][j] = avglmdy[i][j+Nr];
	}

	for(i=1; i<=NST; i++)
		avglmdom[1][i] = avgpara[NP+6+NP+i];
	indlmdom[1] = 1;
	indlmdom[2] = indlmdom[3] = indlmdom[4] = 2; indlmdom[5] = 3;
	for(j=1; j<=NQ2; j++){
		for(i=1; i<=NK-1; i++){
			indlmdom[5+(j-1)*(NK-1)+i] = j+2;
		}
	}
	for(i=5+NQ2*(NK-1)+1; i<=5+NQ2*(NK-1)+(NK-1)*(NK-1); i++)
		indlmdom[i] = 3+NQ2;
	if(5+NQ2*(NK-1)+(NK-1)*(NK-1)<NST){
		for(i=5+NQ2*(NK-1)+(NK-1)*(NK-1)+1; i<=NST; i++)
			indlmdom[i] = 3+NQ2+1;
	}

	for(i=1; i<=NQ1; i++)
		avgpsz[i] = avgpara[NP+6+NP+NST+i];

	avgPhi[1][1] = avgpara[NP+6+NP+NST+NQ1+1];
	avgPhi[1][2] = avgPhi[2][1] = avgpara[NP+6+NP+NST+NQ1+2]; 
	avgPhi[2][2] = avgpara[NP+6+NP+NST+NQ1+3];

	j = NP+6+NP+NST+NQ1+3;

	for(i=1; i<=Nlassolmd; i++)
		avglassolmd[i] = avgpara[j+i];
	j += Nlassolmd;
	for(i=1; i<=NST; i++){
		printlmdom[i][1] = 1.0*indlmdom[i];
		for(j=1; j<=NQ1; j++)
			printlmdom[i][j+1] = avglmdom[j][i];
	}
	cout << "\n Average estimates of A:"; dmatrixprint(avgAm, NP, Nr);
	cout << "\n Average estimates of Lmd:"; dmatrixprint(avglmd, NP, NQ);
	cout << "\n Average estimates of psi:"; dvectorprint(avgpsi, NP);
	cout << "\n Average estimates of Lmdom:"; dmatrixprint(printlmdom, NST, NQ1+1);//ivectorprint(indlmdom, NST); dmatrixprint(avglmdom, NQ1, NST);
	cout << "\n Average estimates of psz:"; dvectorprint(avgpsz, NQ1);
	cout << "\n Average estimates of Phi:"; dmatrixprint(avgPhi, NQ2, NQ2);
	cout << "\n Average estimates of lasso parameters:"; dvectorprint(avglassolmd, Nlassolmd);

	char*	filename = new char[100];
	sprintf(filename, "Estimates.txt");
	FILE*   out=fopen(filename, "w");
	fprintf(out, "Am is: \n");
	for(i=1; i<=NP; i++){
		for(j=1; j<=Nr; j++)
			fprintf(out, "%10.4f \t", avgAm[i][j]);
		fprintf(out, "\n");
	}
	fprintf(out, "\n Lambda is: \n");
	for(i=1; i<=NP; i++){
		for(j=1; j<=NQ; j++)
			fprintf(out, "%10.4f \t", avglmd[i][j]);
		fprintf(out, "\n");
	}
	fprintf(out, "\n Psi is: \n");
	for(i=1; i<=NP; i++)
		fprintf(out, "%10.4f \n", avgpsi[i]);

	fprintf(out, "\n Lambda in SE:\n");
	for(j=1; j<=NST; j++){
		fprintf(out, "%d \t", indlmdom[j]);
		for(i=1; i<=NQ1; i++)
			fprintf(out, "%10.4f \t", avglmdom[i][j]);
		fprintf(out, "\n");
	}
	fprintf(out, "\n Psz is: \n");
	for(i=1; i<=NQ1; i++)
		fprintf(out, "%10.4f \n", avgpsz[i]);
	fprintf(out, "\n Phi is: \n");
	for(i=1; i<=NQ2; i++){
		for(j=1; j<=NQ2; j++)
			fprintf(out, "%10.4f \t", avgPhi[i][j]);
		fprintf(out, "\n");
	}
	fprintf(out, "\n Lasso parameters: \n");
	for(i=1; i<=3; i++)
		fprintf(out, "%10.4f \n", avglassolmd[i]);
	fclose(out);


	free_dvector(avgpsi, 1, NP);
	free_dvector(avgpsz, 1, NQ1);
	free_dvector(avglassolmd, 1, 3);
	free_dmatrix(avgPhi, 1, NQ2, 1, NQ2);
	free_dmatrix(avglmdy, 1, NP, 1, NY);
	free_dmatrix(avgAm, 1, NP, 1, Nr);
	free_dmatrix(avglmd, 1, NP, 1, NQ);
	free_dmatrix(avglmdom, 1, NQ1, 1, NST);
	free_ivector(indlmdom, 1, NST);
	free_dmatrix(printlmdom, 1, NST, 1, NQ1+1);
	
}



void savesim(double **YV, double **XV, int n, int p, int s, char *filename)
{ 
	int i, j;
	FILE*   out=fopen(filename, "w");
	for(i=1; i<=n; i++){
		for(j=1; j<=p; j++)
			fprintf(out, "%10.4f\t", YV[i][j]);
		for(j=1; j<=s; j++)
			fprintf(out, "%10.4f\t", XV[i][j]);
		fprintf(out, "\n");
	}
	fclose(out);
}


//=================================================================================================

// Calculate G(x_i, w_i)=(eta_i, x_i, H(xi_i))
// Notations: omi = (eta_i, xi_i); xi = x_i; 
// NOTE: gxi2() is only for q2=2 & H(xi_i) = (xi_1, xi_2, xi_1*xi_2);
void gxi2(double *gx2, double *xi, double *omi)
{
	int i;
	double*	ksii=dvector(1, NQ2);
	double*	hi;

	for(i=1; i<=NQ1; i++)	gx2[i]=omi[i];
	for(i=1; i<=NX; i++)	gx2[i+NQ1]=xi[i];
	for(i=1; i<=NQ2; i++)	ksii[i]=omi[i+NQ1];
	hi=Hksii(ksii);
	for(i=1; i<=NT; i++)	gx2[i+NQ1+NX]=hi[i];
	free_dvector(ksii, 1, NQ2);
	free_dvector(hi, 1, NT);
} //end gxi2();


// Calculate H(xi_i)
// For Simulation 1, 2, &3, H(xi_i) = (xi_1, xi_2, xi_1*xi_2);
double* Hksii(double *ksii)
{
	int i;
	double* h=dvector(1, NT);
	double** ni=dmatrix(1, 1, 1, NT);
	double** ksii1=dmatrix(1, 1, 1, NQ2);
	for(i=1; i<=NQ2; i++)
		ksii1[1][i] = ksii[i];
	basis(ni, ksii1, Knots, 1, NK);
	for(i=1; i<=NT; i++)
		h[i] = ni[1][i];
	free_dmatrix(ni, 1, 1, 1, NT);	free_dmatrix(ksii1, 1, 1, 1, NQ2);

	return h;
} //end Hksii();

double* Hksii(double *ksii, double **knots)
{
	int i;
	double* h=dvector(1, NT);
	double** ni=dmatrix(1, 1, 1, NT);
	double** ksii1=dmatrix(1, 1, 1, NQ2);

	for(i=1; i<=NQ2; i++)
		ksii1[1][i] = ksii[i];
	basis(ni, ksii1, knots, 1, NK);
	for(i=1; i<=NT; i++)
		h[i] = ni[1][i];
	free_dmatrix(ni, 1, 1, 1, NT);	free_dmatrix(ksii1, 1, 1, 1, NQ2);

	return h;
} //end Hksii();


// Provides starting values of w_i given (B, Gamma, Phi, PSD)
// Output: om = (om1, ..., omn), with omi = (eta_i, xi_i) = w_i
// Input: GMX = (Pi, B, Gamma); 
// Generate xi_i from N(0, PhI)
// Generate eta_i from the structural equation (normal distribution) based on xi_i 
// Pi0 = I-Pi;
void init(double** Pi, double **B,double **Gamma, double **PHI,double *PSD,double **X,double **om)  
{
   int    i, j, k;
   double	**INPH1, **ksii, *xxi, **invPi0;
   double*	omi = dvector(1, NQ);
   double*	hi;
            
   ksii=dmatrix(1,NN,1,NQ2);      xxi=dvector(1,NX);
   INPH1=dmatrix(1,NQ2,1,NQ2);	  invPi0=dmatrix(1, NQ1, 1, NQ1);
   for(i=1;i<=NQ2;i++){
	   for(j=1;j<=NQ2;j++) INPH1[i][j]=PHI[i][j]; 
   }
   multinor(NN, NQ2, INPH1, ksii);

   for(i=1; i<=NQ1; i++)
	   for(j=1; j<=NQ1; j++){
		   if(i==j) invPi0[i][i] = 1-Pi[i][i];
		   else invPi0[i][j] = -Pi[i][j];
	   }
	invv(invPi0, NQ1);

   for(i=1; i<=NN; i++){       
       for(j=1; j<=NQ2; j++) omi[j+NQ1]=ksii[i][j];
	   for(j=1; j<=NX; j++) xxi[j]=X[j][i];
	   hi = Hksii(ksii[i]);

       for(j=1; j<=NQ1; j++){
		   omi[j]=gasdev()*sqrt(PSD[j]);
		   for(k=1; k<=NX; k++){
			   omi[j]+=B[j][k]*xxi[k];
		   }
		   for(k=1; k<=NT; k++) omi[j]+=Gamma[j][k]*hi[k];
	   }
	   free_dvector(hi, 1, NT);
	    
	   for(j=1; j<=NQ1; j++){
		   om[j][i] = 0.0;
		   for(k=1; k<=NQ1; k++){
				om[j][i] += invPi0[j][k]*omi[k];
		   }
	   }
	   for(j=1; j<=NQ2; j++) om[j+NQ1][i]=ksii[i][j];
    }
   free_dmatrix(INPH1,1,NQ2,1,NQ2); free_dmatrix(invPi0, 1, NQ1, 1, NQ1);
   free_dmatrix(ksii,1,NN,1,NQ2);  free_dvector(omi, 1, NQ);	free_dvector(xxi,1,NX);   
} //end init()



// pomegai() calculates log-probability log-p(omega_i|y_{io},...)
// th=(etai, xi, H(ksii));  gm=(Pi, B, Gamma);  fo = inv(Phi);  xi=x_i;
// Psi=diag(Psi_epsilon); Psd=diag(Psi_zeta);
double pomegai(double **Am, double *ci, double **Lmd,double *omi,double *yi,double *xi,double *Psi,double **Phi,double **gm,double *Psd)
{  
   int i,j;
   double *ksii,*th,**fo;
   double temp,tempp,tp;

   ksii=dvector(1,NQ2);  th=dvector(1,NST);   fo=dmatrix(1,NQ2,1,NQ2); 
   temp=0.0;
   for(i=1;i<=NP;i++){
      tp=yi[i];
	  for(j=1;j<=Nr;j++) tp-=Am[i][j]*ci[j];
      for(j=1;j<=NQ;j++) tp-=Lmd[i][j]*omi[j]; 
	  temp+=tp*tp/Psi[i];
   }
   for(i=1;i<=NQ2;i++)  ksii[i]=omi[NQ1+i]; 
   gxi2(th, xi, omi);
   for(i=1;i<=NQ1;i++){
     tempp=omi[i]; 
     for(j=1;j<=NST;j++)  tempp-=gm[i][j]*th[j];
     temp+=tempp*tempp/Psd[i];   
   }  

   for(i=1;i<=NQ2;i++){
	   for(j=1;j<=NQ2;j++)  fo[i][j]=Phi[i][j];  
   } 
   invv(fo,NQ2); 
   for(i=1;i<=NQ2;i++){
     for(j=1;j<=NQ2;j++)  temp+=ksii[i]*ksii[j]*fo[i][j];
   }

   free_dvector(ksii,1,NQ2);  free_dvector(th,1,NST);   free_dmatrix(fo,1,NQ2,1,NQ2);
   return (-0.5*temp);
} // End of pomegai()


// pomegaistd() calculates log-probability log-p(omega_i|y_{io},...)
// th=(etai, xi, H(ksii));  gm=(Pi, B, Gamma);  fo = inv(Phi);  xi=x_i;
// Psi=diag(Psi_epsilon); Psd=diag(Psi_zeta);
double pomegaistd(double **Am, double *ci, double **Lmd,double *omi,double *yi,double *xi,double *Psi,double **Phi,double **gm,double *Psd, double *mstd, double *sstd)
{  
   int i,j;
   double *ksii,*th,**fo;
   double temp,tempp,tp;

   ksii=dvector(1,NQ2);  th=dvector(1,NST);   fo=dmatrix(1,NQ2,1,NQ2); 
   temp=0.0;
   for(i=1;i<=NP;i++){
      tp=yi[i];
	  for(j=1;j<=Nr;j++) tp-=Am[i][j]*ci[j];
      for(j=1;j<=NQ;j++) tp-=Lmd[i][j]*omi[j]; 
	  temp+=tp*tp/Psi[i];
   }
   for(i=1;i<=NQ2;i++)  ksii[i]=omi[NQ1+i]; 
   gxi2(th, xi, omi);
   for(i=1; i<=NST; i++)
	   th[i] = (th[i]-mstd[i])/sstd[i];
   for(i=1;i<=NQ1;i++){
     tempp=omi[i]; 
     for(j=1;j<=NST;j++)  tempp-=gm[i][j]*th[j];
     temp+=tempp*tempp/Psd[i];   
   }  

   for(i=1;i<=NQ2;i++){
	   for(j=1;j<=NQ2;j++)  fo[i][j]=Phi[i][j];  
   } 
   invv(fo,NQ2); 
   for(i=1;i<=NQ2;i++){
     for(j=1;j<=NQ2;j++)  temp+=ksii[i]*ksii[j]*fo[i][j];
   }

   free_dvector(ksii,1,NQ2);  free_dvector(th,1,NST);   free_dmatrix(fo,1,NQ2,1,NQ2);
   return (-0.5*temp);
} // End of pomegaistd()


// omiprop() propose a new omegai from Normal based on previous omegai
// double** Deltah=dmatrix(1, NT, 1, NQ2);
// gm=(Pi, B, Gamma);
void omiprop(double *propomi, double *oldomi, double s2omi, double **Lmd, double **Phi, double **gm, double *Psd, double *Psi, double **Deltah)
{
	int			i, j, k;
	double**	V0=dmatrix(1, NQ, 1, NQ);
	double**	V2=dmatrix(1, NQ1, 1, NQ2);
	double**	V3=dmatrix(1, NQ2, 1, NQ2);
	double**	invPhi=dmatrix(1, NQ2, 1, NQ2);
	double**	Vomg=dmatrix(1, NQ,	1, NQ);	 
	double**	Pi0=dmatrix(1, NQ1, 1, NQ1);
	double**	newomi=dmatrix(1, 1, 1, NQ);

   for(i=1; i<=NQ1; i++)
	   for(j=1; j<=NQ1; j++){
		   if(i==j) Pi0[i][i] = 1-gm[i][i];
		   else Pi0[i][j] = -gm[i][j];
	   }

	for(i=1;i<=NQ1;i++){
		for(j=1;j<=NQ1;j++) {
			Vomg[i][j]=0.0;
			for(k=1; k<=NQ1; k++)
				Vomg[i][j] += Pi0[k][i]*Pi0[k][j]/Psd[k];
		}
	}  
   for(i=1;i<=NQ1;i++){
       for(j=1;j<=NQ2;j++){
		   V2[i][j]=0.0;
		   for(k=1;k<=NT;k++)
			   V2[i][j] -= gm[i][k+NQ1+NX]*Deltah[k][j];
		   V2[i][j] /= Psd[i];
       }
   } 
   for(i=1;i<=NQ1;i++){
	   for(j=1;j<=NQ2;j++){
			Vomg[i][j+NQ1]=0.0;
			for(k=1; k<=NQ1; k++){
				Vomg[i][j+NQ1] += Pi0[k][i]*V2[k][j];
			}
			Vomg[j+NQ1][i] = Vomg[i][j+NQ1];

	   }
   }
   for(i=1;i<=NQ2;i++){
       for(j=1;j<=NQ2;j++)
		   invPhi[i][j]=Phi[i][j];
   }	  
   invv(invPhi, NQ2);
   for(i=1;i<=NQ2;i++){
	   for(j=1;j<=NQ2;j++){
		   V3[i][j]=0.0;
		   for(k=1;k<=NQ1;k++)
			   V3[i][j] += V2[k][i]*V2[k][j]*Psd[k];
		   V3[i][j]+=invPhi[i][j];
	   }
   }   

   for(i=1;i<=NQ;i++){
       for(j=1;j<=NQ;j++){
		   V0[i][j]=0.0;
		   for(k=1;k<=NP;k++)
			   V0[i][j]+=Lmd[k][i]*Lmd[k][j]/Psi[k];
	   }
   }    

   for(i=1;i<=NQ2;i++)
       for(j=1;j<=NQ2;j++)
		   Vomg[i+NQ1][j+NQ1]=V3[i][j];

   for(i=1;i<=NQ;i++){
       for(j=1;j<=NQ;j++) 
		   Vomg[i][j] += V0[i][j];
   }   
   invv(Vomg, NQ);
   for(i=1;i<=NQ;i++){
       for(j=1;j<=NQ;j++) 
		   Vomg[i][j] = Vomg[i][j]*s2omi;
   }   
   multinor(1, NQ, Vomg, newomi);
   for(i=1;i<=NQ;i++)
	   propomi[i] = newomi[1][i]+oldomi[i];

	free_dmatrix(V0, 1, NQ, 1, NQ);
	free_dmatrix(V2, 1, NQ1, 1, NQ2);
	free_dmatrix(V3, 1, NQ2, 1, NQ2);
	free_dmatrix(invPhi, 1, NQ2, 1, NQ2);
	free_dmatrix(Vomg, 1, NQ, 1, NQ);	
	free_dmatrix(Pi0, 1, NQ1, 1, NQ1);
	free_dmatrix(newomi, 1, 1, 1, NQ);

} //End of omiprop()


// Metropolis-Hasting Algorithm for omega_i, i=1, ..., n
double omiMHstd(double *newomi, double *oldomi, double *yi, double *xi, double **Am, 
				double *ci, double **gm, double s2omi, double **Lmd, double *Psi, 
				double *Psd, double **Phi, double **lmdom, double **Deltah,
				double *mstd, double*sstd)
{
	int		i;
	double	oldp, newp, logar, accept;
	double*	propomi=dvector(1, NQ);

	oldp = pomegaistd(Am,  ci,  Lmd, oldomi, yi, xi, Psi, Phi, lmdom, Psd, mstd, sstd);
	omiprop(propomi, oldomi, s2omi, Lmd, Phi, gm, Psd, Psi, Deltah);
	newp = pomegaistd(Am,  ci,  Lmd, propomi, yi, xi, Psi, Phi, lmdom, Psd, mstd, sstd);
	logar = min(0.0, newp-oldp);

	if(log(s_xuni())<=logar){
		accept=1.0;
		for(i=1; i<=NQ; i++)
			newomi[i] = propomi[i];
	}else{
		accept=0.0;
		for(i=1; i<=NQ; i++)
			newomi[i] = oldomi[i];
	}
	free_dvector(propomi, 1, NQ);
	return accept;
	
} //End of omiMHstd()

// genME() generates Lamda_yk and psi_k from normal, k=1, ..., p.
// lmdyk is both input and output, the known part keep unchanged.
// ME stands for measurement equation; for no penalization case
// yk is a n*1 vector, k=1, ..., p.
// Lambda_yk =(A_k, Lambda_k) is a (r+q)*1 vector
// gy=(C^T, Omega^T)^T in ME,(r+q)*n matrix	
// Prior of Lambda_yk*: N(mu0yk, h0yk), only for unknown parameters
// *mindk is a ryk*1 index vector, gives the original location of each unknown parameter
double genME(double *lmdyk, int ryk, int *mindk, double *yk, double **gy, double psik0, double *mu0yk0, double **h0yk0, double a0, double b0)
{
	int			i, j, l, ny = Nr+NQ;
	double		psik;

	if(ryk>0){
		double*		ykstar = dvector(1, NN);
		double*		muyk = dvector(1, ryk);
		double*		mu0yk = dvector(1, ryk);
		double**	hyk = dmatrix(1, ryk, 1, ryk);
		double**	h0yk = dmatrix(1, ryk, 1, ryk);
		double**	gyk = dmatrix(1, ryk, 1, NN);
		double**	gykT = dmatrix(1, NN, 1, ryk);
		double**	invhyk = dmatrix(1, ryk, 1, ryk);
		double**	invh0yk = dmatrix(1, ryk, 1, ryk);
		double*		temp = dvector(1, ryk);
		double*		temp1 = dvector(1, ryk);
		double*		temp2 = dvector(1, ryk);
		double**	lmdykstar = dmatrix(1, 1, 1, ryk);
		double**	Slmdyk = dmatrix(1, ryk, 1, ryk);
		int*		mindk1 = zeroivector(ny);
		int*		mindk2 = zeroivector(ny-ryk);

		for(i=1; i<=ryk; i++)
			mindk1[mindk[i]] = 1;
		j=0;
		for(i=1; i<=ny; i++){
			if(!mindk1[i]){
				j++;
				mindk2[j] = i; 
			}
		}

		for(i=1; i<=NN; i++){
			ykstar[i] = yk[i];
			for(j=1; j<=ny-ryk; j++)
				ykstar[i] -= lmdyk[mindk2[j]]*gy[mindk2[j]][i];
			for(j=1; j<=ryk; j++)
				gyk[j][i] = gy[mindk[j]][i];
		}
		for(j=1; j<=ryk; j++){
			mu0yk[j] = mu0yk0[mindk[j]];
			for(i=1; i<=ryk; i++)
				h0yk[j][i] = h0yk0[mindk[j]][mindk[i]];
		}

		// update Lambda_yk
		dmatrixtranspose(gyk, ryk, NN, gykT);
		dmatrixmultiply(gyk, ryk, NN, gykT, NN, ryk, invhyk);
		dmatrixeq(h0yk, ryk, ryk, invh0yk);
		invv(invh0yk, ryk);
		for(i=1; i<=ryk; i++)
			for(j=1; j<=ryk; j++){
				invhyk[i][j] += invh0yk[i][j];
				hyk[i][j] = invhyk[i][j];
			}
		invv(hyk, ryk);
		dmatrixmultiply(invh0yk, ryk, ryk, mu0yk, temp1);
		dmatrixmultiply(gyk, ryk, NN, ykstar, temp2);
		for(i=1; i<=ryk; i++)
			temp[i] = temp1[i]+temp2[i];

		dmatrixmultiply(hyk, ryk, ryk, temp, muyk);
		for(i=1; i<=ryk; i++){
			for(j=1; j<=ryk; j++)
				Slmdyk[i][j] = hyk[i][j]*psik0;
		}

		multinor(1, ryk, Slmdyk, lmdykstar);
		for(i=1; i<=ryk; i++){
			lmdykstar[1][i] += muyk[i];
			lmdyk[mindk[i]] = lmdykstar[1][i];
		}

		// update psi_k
		double		temp3 = 0.0, temp4 = 0.0, temp5 = 0.0, a, b;
		for(i=1; i<=ryk; i++){
			temp3 += mu0yk[i]*temp1[i];
			temp4 += muyk[i]*temp[i];
		}
		for(i=1; i<=NN; i++)
			temp5 += ykstar[i]*ykstar[i];
		
		a = a0 + NN/2;
		b = b0 + .5*(temp5+temp3-temp4);
		psik = 1/(rgamma(a, b));

		free_dvector(ykstar, 1, NN);
		free_dvector(muyk, 1, ryk);
		free_dvector(mu0yk, 1, ryk);
		free_dmatrix(hyk, 1, ryk, 1, ryk);
		free_dmatrix(h0yk, 1, ryk, 1, ryk);
		free_dmatrix(gyk, 1, ryk, 1, NN);
		free_dmatrix(gykT, 1, NN, 1, ryk);
		free_dmatrix(invhyk, 1, ryk, 1, ryk);
		free_dmatrix(invh0yk, 1, ryk, 1, ryk);
		free_dvector(temp, 1, ryk);
		free_dvector(temp1, 1, ryk);
		free_dvector(temp2, 1, ryk);
		free_dmatrix(lmdykstar, 1, 1, 1, ryk);
		free_dmatrix(Slmdyk, 1, ryk, 1, ryk);		
		free_ivector(mindk1, 1, ny);
		free_ivector(mindk2, 1, ny-ryk);

	}else{
		double a, b, temp;
		double *temp1 = dvector(1, NN);
		temp = 0.0;
		for(i=1; i<=NN; i++){
			temp1[i] = yk[i]; 
			for(j=1; j<=ny; j++)
				temp1[i] -= gy[j][i]*lmdyk[j];
			temp += temp1[i]*temp1[i];
		}
		a = a0 + NN/2;
		b = b0 + .5*temp;
		psik = 1/(rgamma(a, b));
		free_dvector(temp1, 1, NN);
	}

	return	psik;

} //End genME()


// genbeta0k() generates the constant term beta0k in SE from normal, k=1, ..., q1;
double genbeta0k(double pszk, double *etak, double *lmdomk0, double **gom0)
{
	int i, j;
	double beta0k, temp, mubeta0k=0.0, sbeta0k;

	sbeta0k = sqrt(pszk/(1.0*NN));
	for(i=1; i<=NN; i++){
		temp = 0.0;
		for(j=1; j<=NST-1; j++)
			temp += lmdomk0[j]*gom0[j][i];
		mubeta0k += (etak[i]-temp); 
	}
	mubeta0k /= (1.0*NN);
	beta0k = mubeta0k+gasdev()*sbeta0k;
	return beta0k;

} // End genbeta0k()



// genLMDomk0() generates Lamda_omegak from normal, k=1, ..., q1.
// This version does not penalize the constant term in the SE
// Lambda_omk =(Pi_k, B_k, Gamma_k) is a (q1+s+t)*1 vector
// go=(Eta, X, H) in SE, a (q1+s+t)*n matrix
// dlmdomk=diagonal elements of D_lambdaomk, dlmdomk[1:NQ1] = 0;
// omk is a n*1 vector, k=1, ..., q1. 
// pszk is Psi_{zeta k}
void genLMDomk0(double *lmdomk, double *omk, double **go, double *dlmdomk, double pszk)
{
	int			i, j, nom=NX+NT;
	double		beta0k;
	double*		lmdomk0 = dvector(1, NST-1);
	double*		dlmdomk0 = dvector(1, NST-1);
	double**	go0 = dmatrix(1, NST-1, 1, NN);
	double**	go0T = dmatrix(1, NN, 1, NST-1);
	double**	invSlmdomk0 = dmatrix(1, NST-1, 1, NST-1);
	double**	temp = dmatrix(1, NST-1, 1, NN);
	double*		mulmdomk0 = dvector(1, NST-1);
	double**	Slmdomk0 = zerodmatrix(NST-1, NST-1);

	for(j=1; j<=NST; j++){
		if(j<=NQ1+NX){
			lmdomk0[j] = lmdomk[j];
			dlmdomk0[j] = dlmdomk[j];
			for(i=1; i<=NN; i++)
				go0[j][i] = go[j][i];
		}else if(j>NQ1+NX+1){
			lmdomk0[j-1] = lmdomk[j];
			dlmdomk0[j-1] = dlmdomk[j];
			for(i=1; i<=NN; i++)
				go0[j-1][i] = go[j][i];
		}
	}
	dmatrixtranspose(go0, NST-1, NN, go0T);
	dmatrixmultiply(go0, NST-1, NN, go0T, NN, NST-1, invSlmdomk0);
	beta0k = genbeta0k(pszk, omk, lmdomk0, go0);

	lmdomk[NQ1+NX+1] = beta0k;
	double**	Slmdomk1 = dmatrix(1, nom-1, 1, nom-1);
	double*		mulmdomk1 = dvector(1, nom-1);
	for(i=1; i<=nom-1; i++)
		for(j=1; j<=nom-1; j++)
			Slmdomk1[i][j] = invSlmdomk0[i+NQ1][j+NQ1] + (1.0/dlmdomk0[i+NQ1])*(i==j);
	invv(Slmdomk1, nom-1);

	for(i=1; i<=nom-1; i++)
		for(j=1; j<=nom-1; j++)
			Slmdomk0[i+NQ1][j+NQ1] = Slmdomk1[i][j];
	dmatrixmultiply(Slmdomk0, NST-1, NST-1, go0, NST-1, NN, temp);
	for(i=1; i<=NST-1; i++){
		mulmdomk0[i] = 0.0;
		for(j=1; j<=NN; j++)
			mulmdomk0[i] += temp[i][j]*(omk[j]-beta0k);
	}
	for(i=1; i<=nom-1; i++){
		mulmdomk1[i] = mulmdomk0[i+NQ1];
		for(j=1; j<=nom-1; j++)
			Slmdomk1[i][j] = Slmdomk1[i][j]*pszk;
	}
	double**	lmdomk01 = dmatrix(1, 1, 1, nom-1);
	multinor(1, nom-1, Slmdomk1, lmdomk01);
	for(i=1; i<=NST; i++){
		if(i<=NQ1)	lmdomk[i] = 0.0;
		else if(i<=NQ1+NX)	
			lmdomk[i] = lmdomk01[1][i-NQ1]+mulmdomk0[i];
		else if(i>NQ1+NX+1)
			lmdomk[i] = lmdomk01[1][i-NQ1-1]+mulmdomk0[i-1];
	}

	free_dmatrix(Slmdomk1, 1, nom-1, 1, nom-1);
	free_dvector(mulmdomk1, 1, nom-1);
	free_dmatrix(lmdomk01, 1, 1, 1, nom-1);
	free_dvector(mulmdomk0, 1, NST-1);  free_dmatrix(go0, 1, NST-1, 1, NN); free_dmatrix(go0T, 1, NN, 1, NST-1);
	free_dmatrix(Slmdomk0, 1, NST-1, 1, NST-1);  free_dmatrix(invSlmdomk0, 1, NST-1, 1, NST-1);
	free_dmatrix(temp, 1, NST-1, 1, NN);	 free_dvector(lmdomk0, 1, NST-1);	free_dvector(dlmdomk0, 1, NST-1);

} //End genLMDomk0()


// genDlmdomk0() generate (tau2_Pik, tau2_Bk, tau2_Gammak) from Inv-Gaussian
// lpk = lambda_Pik; lbk = lambda_Bk; lgk = lambda_Gammak
void genDlmdomk0(double *dlmdomk, double *lmdomk, double pszk, double lpk, double lbk, double lgk)
{
	int		i, j;
	double	mu2, lmd2, mu3, lmd3;

	for(j=1; j<=NQ1; j++)
			dlmdomk[j] = 0.0;

	for(j=1; j<=NX; j++){
		mu2 = sqrt((lbk/lmdomk[NQ1+j])*(lbk/lmdomk[NQ1+j])*pszk);
		lmd2 = lbk*lbk;
		dlmdomk[NQ1+j] = 1.0/(inv_gaussian(mu2, lmd2));
	}

	for(j=2; j<=NT; j++){
		mu3 = sqrt((lgk/lmdomk[NQ1+NX+j])*(lgk/lmdomk[NQ1+NX+j])*pszk);
		lmd3 = lgk*lgk;
		dlmdomk[NQ1+NX+j] = 1.0/(inv_gaussian(mu3, lmd3));
	}
	dlmdomk[NQ1+NX+1] = 0.0;
} //End of genDlmdomk0()


// genpszk0() generate psz_k from inverse gamma distribution
double genpszk0(double a0, double b0, double *omk, double **go, double *lmdomk, double *dlmdomk)
{
	int		i, j;
	double	pszk, a, b, temp1, temp2;
	double*	temp = dvector(1, NN);	

	a = a0 + (NN+NX+NT)/2.0;
	b = b0;

	for(i=1; i<=NN; i++){
		temp[i] = omk[i];
		for(j=1; j<=NST; j++)
			temp[i] -= go[j][i]*lmdomk[j];
	}
	temp1 = 0.0;
	for(i=1; i<=NN; i++){
		temp1 += temp[i]*temp[i];
	}
	temp2 = 0.0;
	for(i=NQ1+1; i<=NST; i++){
		if(i!=NQ1+NX+1)
			temp2 += lmdomk[i]*(1.0/dlmdomk[i])*lmdomk[i];
	}
	b += (temp1+temp2)/2.0;
	pszk = 1.0/(rgamma(a, b));

	free_dvector(temp, 1, NN);
	
	return pszk;
} //End of genpszk0




// genPhi() generate Phi from Inverse-Wishart 
void genPhi(double **Phi, double **R0, int r0, double **Omg2)
{
	int		i, j;
	double **R=dmatrix(1, NQ2, 1, NQ2);
	double **Omg2T=dmatrix(1, NN, 1, NQ2);

	dmatrixtranspose(Omg2, NQ2, NN, Omg2T);
	dmatrixmultiply(Omg2, NQ2, NN, Omg2T, NN, NQ2, R);
	for(i=1; i<=NQ2; i++)
		for(j=1; j<=NQ2; j++)
			R[i][j] += R0[i][j];
	in_wish(NQ2, NN+r0, R, Phi);

	free_dmatrix(R, 1, NQ2, 1, NQ2);
	free_dmatrix(Omg2T, 1, NN, 1, NQ2);

} //End of genPhi()



// Updata Lasso parameters (lambda_Pik, lambda_Bk, lambda_Gammak)
// lambda for beta0k is 0. 
// r0 and dt0 are 3*1 vector of given hyperparameters. 
// lmd = (lpk, lbk, lgk)
void lassolmdk1(double *lmd, double *r0, double *dt0, double *dlmdomk)
{
	int			i, j;
	int*		shape=ivector(1, 3);
	double*		sumtau=zerodvector(3);

	shape[1]=NQ1; shape[2]=NX; shape[3]=NT;
	for(j=1; j<=NX; j++)
			sumtau[2] += dlmdomk[j+NQ1];
	for(j=2; j<=NT; j++)
			sumtau[3] += dlmdomk[j+NQ1+NX];

	lmd[1] = 0.0;
	lmd[2] = rgamma(1.0*shape[2]+r0[2], sumtau[2]/2+dt0[2]);
	lmd[2] = sqrt(lmd[2]);
	lmd[3] = rgamma(1.0*shape[3]+r0[3]-1, sumtau[3]/2+dt0[3]);
	lmd[3] = sqrt(lmd[3]);

	free_ivector(shape, 1, 3);
	free_dvector(sumtau, 1, 3);
} //End of lassolmdk1()



// OneStepMCMCstd() is one step in the MCMC to update unknown parameters
// SE basis function are standardized
// For ME unpenalized case 
// Given hyperparameters: s2omi, a0psik, b0psik,
void OneStepMCMCstd(double **gy, double **lmdy, int **mindex, int *ry, double *psi, double **lmdom, 
				 double **dlmdom, double *psz, double **Phi, 
				 double **Y, double **lassolmd, double **X, double **Deltah, 
				 double **mu0y, double ***h0y, double *a0psi, double *b0psi, 
				 double *a0psz, double *b0psz,
				 double *lassor0, double *lassodt0, double **R0, int r0,
				 double s2omi, double *accept, double* mstd, double *sstd, int penalize)
{
	int			i, j, k, ny=Nr+NQ;
	double		oldpsik; 
	double*		xi = dvector(1, NX);
	double*		yi = dvector(1, NP);
	double*		oldomi = dvector(1, NQ);
	double*		newomi = dvector(1, NQ);
	double*		ci = dvector(1, Nr);

	double*		lmdyk = dvector(1, ny);
	double*		yk = dvector(1, NN);
	double**	Am = dmatrix(1, NP, 1, Nr);
	double**	Lmd = dmatrix(1, NP, 1, NQ);

	double*		etak = dvector(1, NN);
	double*		lmdomk = dvector(1, NST);
	double*		dlmdomk = dvector(1, NST);
	double*		gomi = dvector(1, NST);
	double**	gom = dmatrix(1, NST, 1, NN);

	for(i=1; i<=NP; i++){
		for(j=1; j<=Nr; j++)
			Am[i][j] = lmdy[i][j];
		for(j=1; j<=NQ; j++)
			Lmd[i][j] = lmdy[i][j+Nr];
	}

	// (1). Updata omega_i
	for(i=1; i<=NN; i++){
		for(j=1; j<=Nr; j++)	ci[j] = gy[j][i];
		for(j=1; j<=NX; j++)	xi[j] = X[j][i];
		for(j=1; j<=NP; j++)	yi[j] = Y[j][i];
		for(j=1; j<=NQ; j++){
			oldomi[j] = gy[j+Nr][i];	
			newomi[j] = gy[j+Nr][i];
		}
		accept[i] = omiMHstd(newomi, oldomi, yi, xi, Am, ci, lmdom, s2omi, Lmd, psi, psz, Phi, lmdom, Deltah, mstd, sstd);
		for(j=1; j<=NQ; j++)
			gy[j+Nr][i] = newomi[j];
	} // for i, 

	// (2). Update theta_1=(A, Lambda, Psi_epsilon)
	for(k=1; k<=NP; k++){
		for(j=1; j<=ny; j++)
			lmdyk[j] = lmdy[k][j];
		for(j=1; j<=NN; j++)	yk[j] = Y[k][j];
		oldpsik = psi[k];
		psi[k] = genME(lmdyk, ry[k], mindex[k], yk, gy, oldpsik, mu0y[k], h0y[k], a0psi[k], b0psi[k]);
		for(j=1; j<=ny; j++)	lmdy[k][j] = lmdyk[j];
	} // for k, theta1

	// (3). Update theta_2=(Pi, B, Gamma, Phi, Psi_zeta)
	for(i=1; i<=NN; i++){
		for(j=1; j<=NX; j++)
			xi[j] = X[j][i];
		for(j=1; j<=NQ; j++)
			newomi[j] = gy[j+Nr][i];
		gxi2(gomi, xi, newomi);
		for(j=1; j<=NST; j++)
			gom[j][i] = gomi[j];
	}
	scale(gom, gom, NST, NN, mstd, sstd, 0);

	for(k=1; k<=NQ1; k++){
		// 3.1 update Lambda_omegak = (Pi_k, B_k, Gamma_k)
		for(j=1; j<=NST; j++){
			lmdomk[j] = lmdom[k][j];
			dlmdomk[j] = dlmdom[k][j];
		}
		for(i=1; i<=NN; i++)
			etak[i] = gy[Nr+k][i];
		genLMDomk0(lmdomk, etak, gom, dlmdomk, psz[k]);

		for(j=1; j<=NST; j++)
			lmdom[k][j] = lmdomk[j];

		if(penalize){
			// 3.2 update dlmdomk = diag(D_Lambdaomk)
			genDlmdomk0(dlmdomk, lmdomk, psi[k], lassolmd[k][1], lassolmd[k][2], lassolmd[k][3]);
			for(j=1; j<=NST; j++)	dlmdom[k][j] = dlmdomk[j];
		}

		// 3.3 update pszk = Psz[k][k]
		psz[k] = genpszk0(a0psz[k], b0psz[k], etak, gom, lmdomk, dlmdomk);

		if(penalize){
			// 3.4 Update Lasso parameters lambda
			lassolmdk1(lassolmd[k], lassor0, lassodt0, dlmdomk);
		}
	} // for k, theta2

	// (4). Update Phi
	double **Omg2 = dmatrix(1, NQ2, 1, NN);
	for(i=1; i<=NQ2; i++)
		for(j=1; j<=NN; j++)
			Omg2[i][j] = gy[i+Nr+NQ1][j];
	genPhi(Phi, R0, r0, Omg2);
	free_dmatrix(Omg2, 1, NQ2, 1, NN);


	free_dvector(xi, 1, NX);
	free_dvector(yi, 1, NP);
	free_dvector(oldomi, 1, NQ);
	free_dvector(newomi, 1, NQ);
	free_dvector(ci, 1, Nr);
	free_dvector(lmdyk, 1, ny);
	free_dvector(yk, 1, NN);
	free_dmatrix(Am, 1, NP, 1, Nr);
	free_dmatrix(Lmd, 1, NP, 1, NQ);
	free_dvector(etak, 1, NN);
	free_dvector(lmdomk, 1, NST);
	free_dvector(dlmdomk, 1, NST);
	free_dvector(gomi, 1, NST);
	free_dmatrix(gom, 1, NST, 1, NN);

} //End of OneStepMCMCstd()


void savestd(double *mstd, double *sstd)
{
	int	i;
	char*	filename = new char[100];
	sprintf(filename, "meanstd.txt");
	FILE*   out=fopen(filename, "w");
	for(i=1; i<=NST; i++){
		fprintf(out, "%10.4f \t", mstd[i]);
		fprintf(out, "%10.4f \t", sstd[i]);
		fprintf(out, "\n");
	}
	fclose(out);
}









/////////////////////////////////////////////////
//	Functions for SEM Spline Version
/////////////////////////////////////////////////

// Calculate Natural Cubic Spline basis matrix 
// Follow Hastie et al.(2009) eq 5.4
// N1(x)=1, N2(x)=x, Nk+2(x)=dk(x)-dK-1(x), K = # of knots
// xv is N*1 input vector, mbasis is N*K output matrix with K basis
// dk() is the function used in the natural cubic spline basis

double dk(double x, double kk, double kK)
{
	double dkx;
	if(kk>=kK)
		printf("\n Error in dk(): kk must be smaller than kK!\n");
	else
		dkx = (nonneg(pow(x-kk, 3)) - nonneg(pow(x-kK, 3)))/(kK-kk);
	return dkx; 
}

void ns(double **mbasis, double *xv, double *knots, int N, int K)
{
	int i, k;
	double dkK1;
	
	for(i=1; i<=N; i++){
		mbasis[i][1] = 1.0;
		mbasis[i][2] = xv[i];
		dkK1 = dk(xv[i], knots[K-1], knots[K]);
		for(k=1; k<=K-2; k++){
			mbasis[i][k+2] = dk(xv[i], knots[k], knots[K]) - dkK1; 
		}
	}

} // End of ns()


// basis() calculates basis matrix of all the basis function at given latent variables
// returns a N*NT matrix, NT = total # of basis functions
// This is for f1(ksi1)+f2(ksi2)+f3(ksi1, ksi2) case, NQ=2
// knots is a NQ2*K matrix
// N is the number of sample of ksi, could be 1, 2, ..., NN.
void basis(double **nsbasis, double **ksi, double **knots, int N, int K)
{
	int i, j, k;
	double *ksi1=dvector(1, N);
	double *ksi2=dvector(1, N);
	double **n1basis=dmatrix(1, N, 1, K);
	double **n2basis=dmatrix(1, N, 1, K);

	for(i=1; i<=N; i++){
		ksi1[i] = ksi[i][1];
		ksi2[i] = ksi[i][2];
	} 
	ns(n1basis, ksi1, knots[1], N, K);
	ns(n2basis, ksi2, knots[2], N, K);

	for(i=1; i<=N; i++){
		for(j=1; j<=K; j++){
			nsbasis[i][j] = n1basis[i][j];
			if(j<K)	nsbasis[i][j+K] = n2basis[i][j+1];
			if(j>=2){
				for(k=2; k<=K; k++)
					nsbasis[i][2*K-1+(j-2)*(K-1)+k-1] = n1basis[i][j]*n2basis[i][k];
			} //if
		}//for j
	} // for i
	free_dvector(ksi1, 1, N);	free_dvector(ksi2, 1, N); 
	free_dmatrix(n1basis, 1, N, 1, K);		free_dmatrix(n2basis, 1, N, 1, K); 
} // End of basis()


// createknots() create knots by finding the percentiles of given sample xv
// xv could be a 1*N vector, or a nv*N matrix containing nv variables.
double** createknots(double **xv, int K, int N, int nv)
{
	int		i, k, step;
	int*	xvindex=ivector(1, N);
	double** knots=dmatrix(1, nv, 1, K);	

	step = (int) round(N/(K+1));
	for(i=1; i<=nv; i++){
		indexx(N, xv[i], xvindex);
		for(k=1; k<=K; k++){
			if(K*step<N-5)
				knots[i][k] = xv[i][xvindex[k*step]];
			else
				knots[i][k] = xv[i][xvindex[k*(step-1)]];
		}
	}
	free_ivector(xvindex, 1, N);
	return knots; 
} // End of createknots()

// dNk(x) calculates the first derivative of Nk+2() at x=0; 
// It returns a K*1 first derivative vector
double *dNk(double *knots, int K)
{
	int k;
	double *dnk=dvector(1, K);

	dnk[1] = 0.0;
	dnk[2] = 1.0;
	for(k=1; k<=K-2; k++){
		dnk[k+2] = 3*(knots[k]*knots[k]*(knots[k]<0.0)-knots[K]*knots[K]*(knots[K]<0.0))/(knots[K]-knots[k]) - 
				   3*(knots[K-1]*knots[K-1]*(knots[K-1]<0.0)-knots[K]*knots[K]*(knots[K]<0.0))/(knots[K]-knots[K-1]);
	}
	return dnk; 
} // End of dNk()

double* Nk0(double *knots, int K)
{
	int k;
	double* nk0=dvector(1, K);
	nk0[1] = 1.0;
	nk0[2] = 0.0;
	for(k=1; k<=K-2; k++){
		nk0[k+2] = (nonneg(pow(-knots[k], 3))-nonneg(pow(-knots[K], 3)))/(knots[K]-knots[k]) - 
				   (nonneg(pow(-knots[K-1], 3))-nonneg(pow(-knots[K], 3)))/(knots[K]-knots[K-1]);
	}
	return nk0;
} // End of Nk0()


// deltaH() calculates the first derivative of H(ksii) at ksii=0; 
// This function is for the spline case with two latent variables including an interaction term
// dh is a NT*NQ2 first derivative matrix
void deltaH(double **dh, double **knots, int K)
{
	int i, j, k; 
	double *dnk1, *dnk2, *nk1, *nk2;

	nk1 = Nk0(knots[1], K);
	nk2 = Nk0(knots[2], K);
	dnk1 = dNk(knots[1], K);
	dnk2 = dNk(knots[2], K);
	for(k=1; k<=K; k++){
		dh[k][1] = dnk1[k];
		dh[k][2] = 0.0;
		if(k<K){
			dh[K+k][1] = 0.0;	
			dh[K+k][2] = dnk2[k+1];
		}
	}
	for(j=1; j<=K-1; j++){
		for(k=1; k<=K-1; k++){
			dh[2*K-1+(j-1)*(K-1)+k][1] = dnk1[j+1]*nk2[k+1];
			dh[2*K-1+(j-1)*(K-1)+k][2] = nk1[j+1]*dnk2[k+1];
		}
	}
	free_dvector(dnk1, 1, K);	free_dvector(dnk2, 1, K);
	free_dvector(nk1, 1, K);	free_dvector(nk2, 1, K);
} // End of deltaH()




//==========================================================================================================================

////////////////////////////////////////
// Functions for parametric nonlinear SE
////////////////////////////////////////


// OneStepMCMCpara() is one step in the MCMC to update unknown parameters
// For ME unpenalized case
// Given hyperparameters: s2omi, a0psik, b0psik,
void OneStepMCMCpara(double **gy, double **lmdy, int **mindex, int *ry, double *psi, double **lmdom, 
				 double **dlmdom, double *psz, double **Phi, double **Y, double **lassolmd, double **X, 
				 double **Deltah, double **mu0y, double ***h0y, double *a0psi, double *b0psi, 
				 double *a0psz, double *b0psz, double *lassor0, double *lassodt0, 
				 double **R0, int r0, double s2omi, double *accept, int penalize, int nt)
{
	int			i, j, k, ny=Nr+NQ, nst=NQ1+NX+nt;
	double		oldpsik; 
	double*		xi = dvector(1, NX);
	double*		yi = dvector(1, NP);
	double*		oldomi = dvector(1, NQ);
	double*		newomi = dvector(1, NQ);
	double*		ci = dvector(1, Nr);

	double*		lmdyk = dvector(1, ny);
	double*		yk = dvector(1, NN);
	double**	Am = dmatrix(1, NP, 1, Nr);
	double**	Lmd = dmatrix(1, NP, 1, NQ);

	double*		etak = dvector(1, NN);
	double*		lmdomk = dvector(1, nst);
	double*		dlmdomk = dvector(1, nst);
	double*		gomi = dvector(1, nst);
	double**	gom = dmatrix(1, nst, 1, NN);

	for(i=1; i<=NP; i++){
		for(j=1; j<=Nr; j++)
			Am[i][j] = lmdy[i][j];
		for(j=1; j<=NQ; j++)
			Lmd[i][j] = lmdy[i][j+Nr];
	}

	// (1). Updata omega_i
	for(i=1; i<=NN; i++){
		for(j=1; j<=Nr; j++)	ci[j] = gy[j][i];
		for(j=1; j<=NX; j++)	xi[j] = X[j][i];
		for(j=1; j<=NP; j++)	yi[j] = Y[j][i];
		for(j=1; j<=NQ; j++){
			oldomi[j] = gy[j+Nr][i];	
			newomi[j] = gy[j+Nr][i];
		}
		accept[i] = omiMH(newomi, oldomi, yi, xi, Am, ci, lmdom, s2omi, Lmd, psi, psz, Phi, lmdom, Deltah, nt);
		for(j=1; j<=NQ; j++)
			gy[j+Nr][i] = newomi[j];
	} // for i, omi

	// (2). Update theta_1=(A, Lambda, Psi_epsilon)
	for(k=1; k<=NP; k++){
		for(j=1; j<=ny; j++)
			lmdyk[j] = lmdy[k][j];
		for(j=1; j<=NN; j++)	yk[j] = Y[k][j];
		oldpsik = psi[k];
		psi[k] = genME(lmdyk, ry[k], mindex[k], yk, gy, oldpsik, mu0y[k], h0y[k], a0psi[k], b0psi[k]);
		for(j=1; j<=ny; j++)	lmdy[k][j] = lmdyk[j];
	} // for k, theta1

	// (3). Update theta_2=(Pi, B, Gamma, Phi, Psi_zeta)
	for(i=1; i<=NN; i++){
		for(j=1; j<=NX; j++)
			xi[j] = X[j][i];
		for(j=1; j<=NQ; j++)
			newomi[j] = gy[j+Nr][i];
		gxi2(gomi, xi, newomi, nt);
		for(j=1; j<=nst; j++)
			gom[j][i] = gomi[j];
	}
	for(k=1; k<=NQ1; k++){
		// 3.1 update Lambda_omegak = (Pi_k, B_k, Gamma_k)
		for(j=1; j<=nst; j++){
			lmdomk[j] = lmdom[k][j];
			dlmdomk[j] = dlmdom[k][j];
		}
		for(i=1; i<=NN; i++)
			etak[i] = gy[Nr+k][i];
		genLMDomk0(lmdomk, etak, gom, dlmdomk, psz[k], nt);
		for(j=1; j<=nst; j++)
			lmdom[k][j] = lmdomk[j];

		if(penalize){
			// 3.2 update dlmdomk = diag(D_Lambdaomk)
			genDlmdomk0(dlmdomk, lmdomk, psi[k], lassolmd[k][1], lassolmd[k][2], lassolmd[k][3], nt);
			for(j=1; j<=nst; j++)	dlmdom[k][j] = dlmdomk[j];
		}

		// 3.3 update pszk = Psz[k][k]
		psz[k] = genpszk0(a0psz[k], b0psz[k], etak, gom, lmdomk, dlmdomk, nt);

		if(penalize){
			// 3.4 Update Lasso parameters lambda
			lassolmdk1(lassolmd[k], lassor0, lassodt0, dlmdomk, nt);
		}
	} // for k, theta2

	// (4). Update Phi
	double **Omg2 = dmatrix(1, NQ2, 1, NN);
	for(i=1; i<=NQ2; i++)
		for(j=1; j<=NN; j++)
			Omg2[i][j] = gy[i+Nr+NQ1][j];
	genPhi(Phi, R0, r0, Omg2);
	free_dmatrix(Omg2, 1, NQ2, 1, NN);


	free_dvector(xi, 1, NX);
	free_dvector(yi, 1, NP);
	free_dvector(oldomi, 1, NQ);
	free_dvector(newomi, 1, NQ);
	free_dvector(ci, 1, Nr);
	free_dvector(lmdyk, 1, ny);
	free_dvector(yk, 1, NN);
	free_dmatrix(Am, 1, NP, 1, Nr);
	free_dmatrix(Lmd, 1, NP, 1, NQ);
	free_dvector(etak, 1, NN);
	free_dvector(lmdomk, 1, nst);
	free_dvector(dlmdomk, 1, nst);
	free_dvector(gomi, 1, nst);
	free_dmatrix(gom, 1, nst, 1, NN);

} //End of OneStepMCMCpara()



// Metropolis-Hasting Algorithm for omega_i, i=1, ..., n
double omiMH(double *newomi, double *oldomi, double *yi, double *xi, double **Am, double *ci, double **gm, double s2omi, double **Lmd, double *Psi, double *Psd, double **Phi, double **lmdom, double **Deltah, int nt)
{
	int		i;
	double	oldp, newp, logar, accept;
	double*	propomi=dvector(1, NQ);

	oldp = pomegai(Am,  ci,  Lmd, oldomi, yi, xi, Psi, Phi, lmdom, Psd, nt);
	//printf("\n Test! Enter an interger:");
	//scanf("%d", &test);
	omiprop(propomi, oldomi, s2omi, Lmd, Phi, gm, Psd, Psi, Deltah, nt);
	newp = pomegai(Am,  ci,  Lmd, propomi, yi, xi, Psi, Phi, lmdom, Psd, nt);
	logar = min(0.0, newp-oldp);
	//if(testprint){
	//	printf("\n oldp =%f:", oldp); 
	//	printf("\n newp =%f:", newp); 
	//	printf("\n logar =%f:", logar); 
	//	cout << "\n propomi: "; dvectorprint(propomi, NQ);
	//	//printf("\n Test! Enter an interger:");
	//	//scanf("%d", &test);
	//}

	if(log(s_xuni())<=logar){
		accept=1.0;
		for(i=1; i<=NQ; i++)
			newomi[i] = propomi[i];
	}else{
		accept=0.0;
		for(i=1; i<=NQ; i++)
			newomi[i] = oldomi[i];
	}
	free_dvector(propomi, 1, NQ);
	return accept;
	
} //End of omiMH()


// pomegai() calculates log-probability log-p(omega_i|y_{io},...)
// th=(etai, xi, H(ksii));  gm=(Pi, B, Gamma);  fo = inv(Phi);  xi=x_i;
// Psi=diag(Psi_epsilon); Psd=diag(Psi_zeta);
double pomegai(double **Am, double *ci, double **Lmd,double *omi,double *yi,double *xi,double *Psi,double **Phi,double **gm,double *Psd, int nt)
{  
   int i,j, nst=NQ1+NX+nt;
   double *ksii,*th,**fo;
   double temp,tempp,tp;

   ksii=dvector(1,NQ2);  th=dvector(1,nst);   fo=dmatrix(1,NQ2,1,NQ2); 
   temp=0.0;
   for(i=1;i<=NP;i++){
      tp=yi[i];
	  for(j=1;j<=Nr;j++) tp-=Am[i][j]*ci[j];
      for(j=1;j<=NQ;j++) tp-=Lmd[i][j]*omi[j]; 
	  temp+=tp*tp/Psi[i];
   }
   for(i=1;i<=NQ2;i++)  ksii[i]=omi[NQ1+i]; 
   gxi2(th, xi, omi, nt);
   for(i=1;i<=NQ1;i++){
     tempp=omi[i]; 
     for(j=1;j<=nst;j++)  tempp-=gm[i][j]*th[j];
     temp+=tempp*tempp/Psd[i];   
   }  

   for(i=1;i<=NQ2;i++){
	   for(j=1;j<=NQ2;j++)  fo[i][j]=Phi[i][j];  
   } 
   invv(fo,NQ2); 
   for(i=1;i<=NQ2;i++){
     for(j=1;j<=NQ2;j++)  temp+=ksii[i]*ksii[j]*fo[i][j];
   }
	//if(testprint){
	//	printf("\n temp =%f:", temp); 
	//	cout << "\n ksii: "; dvectorprint(ksii, NQ2);
	//	cout << "\n th: "; dvectorprint(th, NST);
	//	//printf("\n Test! Enter an interger:");
	//	//scanf("%d", &test);
	//}
   free_dvector(ksii,1,NQ2);  free_dvector(th,1,nst);   free_dmatrix(fo,1,NQ2,1,NQ2);
   return (-0.5*temp);
} // End of pomegai()


// omiprop() propose a new omegai from Normal based on previous omegai
// double** Deltah=dmatrix(1, NT, 1, NQ2);
// gm=(Pi, B, Gamma);
void omiprop(double *propomi, double *oldomi, double s2omi, double **Lmd, double **Phi, double **gm, double *Psd, double *Psi, double **Deltah, int nt)
{
	int			i, j, k, nst=NQ1+NX+nt;
	double**	V0=dmatrix(1, NQ, 1, NQ);
	double**	V2=dmatrix(1, NQ1, 1, NQ2);
	double**	V3=dmatrix(1, NQ2, 1, NQ2);
	double**	invPhi=dmatrix(1, NQ2, 1, NQ2);
	double**	Vomg=dmatrix(1, NQ,	1, NQ);	 
	double**	Pi0=dmatrix(1, NQ1, 1, NQ1);
	double**	newomi=dmatrix(1, 1, 1, NQ);

   for(i=1; i<=NQ1; i++)
	   for(j=1; j<=NQ1; j++){
		   if(i==j) Pi0[i][i] = 1-gm[i][i];
		   else Pi0[i][j] = -gm[i][j];
	   }

	for(i=1;i<=NQ1;i++){
		for(j=1;j<=NQ1;j++) {
			Vomg[i][j]=0.0;
			for(k=1; k<=NQ1; k++)
				Vomg[i][j] += Pi0[k][i]*Pi0[k][j]/Psd[k];
		}
	}  
   for(i=1;i<=NQ1;i++){
       for(j=1;j<=NQ2;j++){
		   V2[i][j]=0.0;
		   for(k=1;k<=nt;k++)
			   V2[i][j] -= gm[i][k+NQ1+NX]*Deltah[k][j];
		   V2[i][j] /= Psd[i];
       }
   } 
   for(i=1;i<=NQ1;i++){
	   for(j=1;j<=NQ2;j++){
			Vomg[i][j+NQ1]=0.0;
			for(k=1; k<=NQ1; k++){
				Vomg[i][j+NQ1] += Pi0[k][i]*V2[k][j];
//				printf("\n Vomg[%d][%d] =%f:", i,j+NQ1,  Vomg[i][j+NQ1]); 
			}
			Vomg[j+NQ1][i] = Vomg[i][j+NQ1];

	   }
   }
   for(i=1;i<=NQ2;i++){
       for(j=1;j<=NQ2;j++)
		   invPhi[i][j]=Phi[i][j];
   }	  
   invv(invPhi, NQ2);
   for(i=1;i<=NQ2;i++){
	   for(j=1;j<=NQ2;j++){
		   V3[i][j]=0.0;
		   for(k=1;k<=NQ1;k++)
			   V3[i][j] += V2[k][i]*V2[k][j]*Psd[k];
		   V3[i][j]+=invPhi[i][j];
	   }
   }   

   for(i=1;i<=NQ;i++){
       for(j=1;j<=NQ;j++){
		   V0[i][j]=0.0;
		   for(k=1;k<=NP;k++)
			   V0[i][j]+=Lmd[k][i]*Lmd[k][j]/Psi[k];
	   }
   }    

   for(i=1;i<=NQ2;i++)
       for(j=1;j<=NQ2;j++)
		   Vomg[i+NQ1][j+NQ1]=V3[i][j];

   for(i=1;i<=NQ;i++){
       for(j=1;j<=NQ;j++) 
		   Vomg[i][j] += V0[i][j];
   }   
   invv(Vomg, NQ);
   for(i=1;i<=NQ;i++){
       for(j=1;j<=NQ;j++) 
		   Vomg[i][j] = Vomg[i][j]*s2omi;
   }   
   multinor(1, NQ, Vomg, newomi);
   for(i=1;i<=NQ;i++)
	   propomi[i] = newomi[1][i]+oldomi[i];
	//if(testprint){
	//	cout << "\n Vomg: "; dmatrixprint(Vomg, NQ, NQ);
	//	cout << "\n V2: "; dmatrixprint(V2, NQ1, NQ2);
	//	cout << "\n V3: "; dmatrixprint(V3, NQ2, NQ2);
	//	cout << "\n propomi: "; dvectorprint(propomi, NQ);
	//	//printf("\n Test! Enter an interger:");
	//	//scanf("%d", &test);
	//}
	free_dmatrix(V0, 1, NQ, 1, NQ);
	free_dmatrix(V2, 1, NQ1, 1, NQ2);
	free_dmatrix(V3, 1, NQ2, 1, NQ2);
	free_dmatrix(invPhi, 1, NQ2, 1, NQ2);
	free_dmatrix(Vomg, 1, NQ, 1, NQ);	
	free_dmatrix(Pi0, 1, NQ1, 1, NQ1);
	free_dmatrix(newomi, 1, 1, 1, NQ);

} //End of omiprop()

// Calculate H(xi_i)
// For parametric linear SE, H(xi_i) = (intercept, xi_1, xi_2, xi_1*xi_2);
double* Hksii(double *ksii, int nt)
{
	int i;
	double* h=dvector(1, nt);
	h[1] = 1.0; 
	for(i=1; i<=NQ2; i++)	h[i+1]=ksii[i];
	h[NQ2+2]=ksii[1]*ksii[2];
	return h;
} //end Hksii();



// Calculate G(x_i, w_i)=(eta_i, x_i, H(xi_i))
// Notations: omi = (eta_i, xi_i); xi = x_i; 
// NOTE: gxi2() is only for q2=2 & H(xi_i) = (xi_1, xi_2, xi_1*xi_2);
void gxi2(double *gx2, double *xi, double *omi, int nt)
{
	int i;
	double*	ksii=dvector(1, NQ2);
	double*	hi;

	for(i=1; i<=NQ1; i++)	gx2[i]=omi[i];
	for(i=1; i<=NX; i++)	gx2[i+NQ1]=xi[i];
	for(i=1; i<=NQ2; i++)	ksii[i]=omi[i+NQ1];
	hi=Hksii(ksii, nt);
	for(i=1; i<=nt; i++)	gx2[i+NQ1+NX]=hi[i];
	free_dvector(ksii, 1, NQ2);
	free_dvector(hi, 1, nt);
} //end gxi2();


// genbeta0k() generates the constant term beta0k in SE from normal, k=1, ..., q1;
double genbeta0k(double pszk, double *etak, double *lmdomk0, double **gom0, int nt)
{
	int i, j, nst=NQ1+NX+nt;
	double beta0k, temp, mubeta0k=0.0, sbeta0k;

	sbeta0k = sqrt(pszk/(1.0*NN));
	for(i=1; i<=NN; i++){
		temp = 0.0;
		for(j=1; j<=nst-1; j++)
			temp += lmdomk0[j]*gom0[j][i];
		mubeta0k += (etak[i]-temp); 
	}
	mubeta0k /= (1.0*NN);
	beta0k = mubeta0k+gasdev()*sbeta0k;
	return beta0k;

} // End genbeta0k()


// Modified version for parametric nonlinear SE.
// genLMDomk0() generates Lamda_omegak from normal, k=1, ..., q1.
// This version does not penalize the constant term in the SE
// Lambda_omk =(Pi_k, B_k, Gamma_k) is a (q1+s+t)*1 vector
// go=(Eta, X, H) in SE, a (q1+s+t)*n matrix
// dlmdomk=diagonal elements of D_lambdaomk, dlmdomk[1:NQ1] = 0;
// omk is a n*1 vector, k=1, ..., q1. 
// pszk is Psi_{zeta k}
void genLMDomk0(double *lmdomk, double *omk, double **go, double *dlmdomk, double pszk, int nt)
{
	int			i, j, nom=NX+nt, nst=NQ1+NX+nt;
	double		beta0k;
	double*		lmdomk0 = dvector(1, nst-1);
	double*		dlmdomk0 = dvector(1, nst-1);
	double**	go0 = dmatrix(1, nst-1, 1, NN);
	double**	go0T = dmatrix(1, NN, 1, nst-1);
	double**	invSlmdomk0 = dmatrix(1, nst-1, 1, nst-1);
	double**	temp = dmatrix(1, nst-1, 1, NN);
	double*		mulmdomk0 = dvector(1, nst-1);
	double**	Slmdomk0 = zerodmatrix(nst-1, nst-1);

	for(j=1; j<=nst; j++){
		if(j<=NQ1+NX){
			lmdomk0[j] = lmdomk[j];
			dlmdomk0[j] = dlmdomk[j];
			for(i=1; i<=NN; i++)
				go0[j][i] = go[j][i];
		}else if(j>NQ1+NX+1){
			lmdomk0[j-1] = lmdomk[j];
			dlmdomk0[j-1] = dlmdomk[j];
			for(i=1; i<=NN; i++)
				go0[j-1][i] = go[j][i];
		}
	}
	dmatrixtranspose(go0, nst-1, NN, go0T);
	dmatrixmultiply(go0, nst-1, NN, go0T, NN, nst-1, invSlmdomk0);
	beta0k = genbeta0k(pszk, omk, lmdomk0, go0, nt);
	//cout << "\n beta0k=" << beta0k;
	//printf("\n Test inside genLMDomk0()! Enter an interger:");
	//scanf("%d", &test);

	lmdomk[NQ1+NX+1] = beta0k;

	double**	Slmdomk1 = dmatrix(1, nom-1, 1, nom-1);
	double*		mulmdomk1 = dvector(1, nom-1);
	for(i=1; i<=nom-1; i++)
		for(j=1; j<=nom-1; j++)
			Slmdomk1[i][j] = invSlmdomk0[i+NQ1][j+NQ1] + (1.0/dlmdomk0[i+NQ1])*(i==j);
	//cout << "\n invSlmdomk1 is:"; dmatrixprint(Slmdomk1, nom, nom);
	invv(Slmdomk1, nom-1);
	//cout << "\n Slmdomk1 is:"; dmatrixprint(Slmdomk1, nom-1, nom-1);
	//printf("\n Test inside genLMDomk! Enter an interger:");
	//scanf("%d", &test);

	for(i=1; i<=nom-1; i++)
		for(j=1; j<=nom-1; j++)
			Slmdomk0[i+NQ1][j+NQ1] = Slmdomk1[i][j];
	dmatrixmultiply(Slmdomk0, nst-1, nst-1, go0, nst-1, NN, temp);
	for(i=1; i<=nst-1; i++){
		mulmdomk0[i] = 0.0;
		for(j=1; j<=NN; j++)
			mulmdomk0[i] += temp[i][j]*(omk[j]-beta0k);
	}
	for(i=1; i<=nom-1; i++){
		mulmdomk1[i] = mulmdomk0[i+NQ1];
		for(j=1; j<=nom-1; j++)
			Slmdomk1[i][j] = Slmdomk1[i][j]*pszk;
	}
	double**	lmdomk01 = dmatrix(1, 1, 1, nom-1);
	multinor(1, nom-1, Slmdomk1, lmdomk01);
	for(i=1; i<=nst; i++){
		if(i<=NQ1)	lmdomk[i] = 0.0;
		else if(i<=NQ1+NX)	
			lmdomk[i] = lmdomk01[1][i-NQ1]+mulmdomk0[i];
		else if(i>NQ1+NX+1)
			lmdomk[i] = lmdomk01[1][i-NQ1-1]+mulmdomk0[i-1];
	}
	//cout << "\n lmdomk is:"; dvectorprint(lmdomk, nst);
	//printf("\n Test inside genLMDomk! Enter an interger:");
	//scanf("%d", &test);

	free_dmatrix(Slmdomk1, 1, nom-1, 1, nom-1);
	free_dvector(mulmdomk1, 1, nom-1);
	free_dmatrix(lmdomk01, 1, 1, 1, nom-1);
	free_dvector(mulmdomk0, 1, nst-1);  free_dmatrix(go0, 1, nst-1, 1, NN); free_dmatrix(go0T, 1, NN, 1, nst-1);
	free_dmatrix(Slmdomk0, 1, nst-1, 1, nst-1);  free_dmatrix(invSlmdomk0, 1, nst-1, 1, nst-1);
	free_dmatrix(temp, 1, nst-1, 1, NN);	 free_dvector(lmdomk0, 1, nst-1);	free_dvector(dlmdomk0, 1, nst-1);

} //End genLMDomk0()


// genDlmdomk0() generate (tau2_Pik, tau2_Bk, tau2_Gammak) from Inv-Gaussian
// lpk = lambda_Pik; lbk = lambda_Bk; lgk = lambda_Gammak
void genDlmdomk0(double *dlmdomk, double *lmdomk, double pszk, double lpk, double lbk, double lgk, int nt)
{
	int		i, j;
	double	mu2, lmd2, mu3, lmd3;

	for(j=1; j<=NQ1; j++)
		dlmdomk[j] = 0.0;
	
	for(j=1; j<=NX; j++){
		mu2 = sqrt((lbk/lmdomk[NQ1+j])*(lbk/lmdomk[NQ1+j])*pszk);
		lmd2 = lbk*lbk;
		dlmdomk[NQ1+j] = 1.0/(inv_gaussian(mu2, lmd2));
		//printf("\n mu2=%f, lmd2=%f, dlmdomk[%d]=%f", mu2, lmd2, NQ1+j, dlmdomk[NQ1+j]);
	}

	for(j=2; j<=nt; j++){
		mu3 = sqrt((lgk/lmdomk[NQ1+NX+j])*(lgk/lmdomk[NQ1+NX+j])*pszk);
		lmd3 = lgk*lgk;
		dlmdomk[NQ1+NX+j] = 1.0/(inv_gaussian(mu3, lmd3));
		//printf("\n mu3=%f, lmd3=%f, dlmdomk[%d]=%f", mu3, lmd3, NQ1+NX+j, dlmdomk[NQ1+NX+j]);
	}
	dlmdomk[NQ1+NX+1] = 0.0;
} //End of genDlmdomk0()


// genpszk0() generate psz_k from inverse gamma distribution
double genpszk0(double a0, double b0, double *omk, double **go, double *lmdomk, double *dlmdomk, int nt)
{
	int		i, j, nst=NQ1+NX+nt;
	double	pszk, a, b, temp1, temp2;
	double*	temp = dvector(1, NN);	

	a = a0 + (NN+NX+nt)/2.0;
	b = b0;

	for(i=1; i<=NN; i++){
		temp[i] = omk[i];
		for(j=1; j<=nst; j++)
			temp[i] -= go[j][i]*lmdomk[j];
	}
	temp1 = 0.0;
	for(i=1; i<=NN; i++){
		temp1 += temp[i]*temp[i];
	}
	temp2 = 0.0;
	for(i=NQ1+1; i<=nst; i++){
		if(i!=NQ1+NX+1)
			temp2 += lmdomk[i]*(1.0/dlmdomk[i])*lmdomk[i];
	}

	b += (temp1+temp2)/2.0;
	pszk = 1.0/(rgamma(a, b));
	//printf("\n temp1=%f, temp2=%f, a=%f, b=%f, pszk=%f.", temp1, temp2, a, b, pszk);

	free_dvector(temp, 1, NN);
	
	return pszk;
} //End of genpszk0


// Updata Lasso parameters (lambda_Pik, lambda_Bk, lambda_Gammak)
// lambda for beta0k is 0. 
// r0 and dt0 are 3*1 vector of given hyperparameters. 
// lmd = (lpk, lbk, lgk)
void lassolmdk1(double *lmd, double *r0, double *dt0, double *dlmdomk, int nt)
{
	int			i, j;
	int*		shape=ivector(1, 3);
	double*		sumtau=zerodvector(3);

	shape[1]=NQ1; shape[2]=NX; shape[3]=nt;
	for(j=1; j<=NX; j++)
			sumtau[2] += dlmdomk[j+NQ1];
	for(j=2; j<=NT; j++)
			sumtau[3] += dlmdomk[j+NQ1+NX];

	lmd[1] = 0.0;
	lmd[2] = rgamma(1.0*shape[2]+r0[2], sumtau[2]/2+dt0[2]);
	lmd[2] = sqrt(lmd[2]);
	lmd[3] = rgamma(1.0*shape[3]+r0[3]-1, sumtau[3]/2+dt0[3]);
	lmd[3] = sqrt(lmd[3]);

	free_ivector(shape, 1, 3);
	free_dvector(sumtau, 1, 3);
} //End of lassolmdk1()



// Provides starting values of w_i given (B, Gamma, Phi, PSD)
// Output: om = (om1, ..., omn), with omi = (eta_i, xi_i) = w_i
// Input: GMX = (Pi, B, Gamma); 
// Generate xi_i from N(0, PhI)
// Generate eta_i from the structural equation (normal distribution) based on xi_i 
// Pi0 = I-Pi;
void init(double** Pi, double **B,double **Gamma, double **PHI,double *PSD,double **X,double **om, int nt)  
{
   int    i, j, k;
   double	**INPH1, **ksii, *xxi, **invPi0;
   double*	omi = dvector(1, NQ);
   double*	hi;
            
   ksii=dmatrix(1,NN,1,NQ2);      xxi=dvector(1,NX);
   INPH1=dmatrix(1,NQ2,1,NQ2);	  invPi0=dmatrix(1, NQ1, 1, NQ1);
   for(i=1;i<=NQ2;i++){
	   for(j=1;j<=NQ2;j++) INPH1[i][j]=PHI[i][j]; 
   }
   multinor(NN, NQ2, INPH1, ksii);
   //cout << "\n ksi[1:10] is:"; dmatrixprint(ksii, 10, NQ2);

   for(i=1; i<=NQ1; i++)
	   for(j=1; j<=NQ1; j++){
		   if(i==j) invPi0[i][i] = 1-Pi[i][i];
		   else invPi0[i][j] = -Pi[i][j];
	   }
	invv(invPi0, NQ1);
   //cout << "\n invPi0 is:"; dmatrixprint(invPi0, NQ1, NQ1);

   for(i=1; i<=NN; i++){       
       for(j=1; j<=NQ2; j++) omi[j+NQ1]=ksii[i][j];
	   for(j=1; j<=NX; j++) xxi[j]=X[j][i];
	   hi = Hksii(ksii[i], nt);

       for(j=1; j<=NQ1; j++){
		   omi[j]=gasdev()*sqrt(PSD[j]);
		   for(k=1; k<=NX; k++){
			   omi[j]+=B[j][k]*xxi[k];
			   //printf("\n B[%d][%d] = %f:", j, k, B[j][k]);
			   //printf("\n xxi[%d] = %f:", k, xxi[k]);
		   }
		   //printf("\n omi[%d] = %f:", j, omi[j]);
		   for(k=1; k<=NT; k++) omi[j]+=Gamma[j][k]*hi[k];
	   }
	 //  cout << "\n hi is:"; dvectorprint(hi, NT);
	   free_dvector(hi, 1, nt);
	 //  cout << "\n omi is:"; dvectorprint(omi, NQ1);
		//printf("\n Test inside init()! Enter an interger:");
		//scanf("%d", &test);
	    
	   for(j=1; j<=NQ1; j++){
		   om[j][i] = 0.0;
		   for(k=1; k<=NQ1; k++){
				om[j][i] += invPi0[j][k]*omi[k];
		   }
	   }
	   for(j=1; j<=NQ2; j++) om[j+NQ1][i]=ksii[i][j];
		//printf("\n Test inside init()! Enter an interger:");
		//scanf("%d", &test);
    }
   free_dmatrix(INPH1,1,NQ2,1,NQ2); free_dmatrix(invPi0, 1, NQ1, 1, NQ1);
   free_dmatrix(ksii,1,NN,1,NQ2);  free_dvector(omi, 1, NQ);	free_dvector(xxi,1,NX);   
} //end init()