##############################################################################
# These are the functions need for fitting stratified proportional odds
# models by amalgamating conditional likelihoods
#
# Written on 30/03/2009
# Revised on 20/08/2009
# Revised on 27/08/2009
# Based on the work of Bhramar Mukerjee and Jaeil Ahn at 07.05.07
#        item 23 on http://www.sph.umich.edu/bhramar/public_html/research/
##############################################################################

##############################################################################
# function to generate a set of sample data for illustration or test purpose.
sample.data=function(nstrata=50,nordinal=5,cov.type=NULL,nobservation=3)
  {
    # definition of parameters
    # nstrata:       	number of strata
    # nobservation:     number of observation in each stratum, say 4
    #				or list of the numbers, say 3:5
    # nordinal:      	cadinality of responses
    # cov.type:      	type of covariates
    #                	1:= (0,1)
    #                	2:= (0,1,2,3,4)
    #                	3:= positive real
    #                	4:= real

    if(is.null(cov.type)){cov.type=rep(1,4)};
    nparm=length(cov.type);     # nparm:  number of covariates

    # number of observations for each stratum
    n.eachs=vector("numeric",nstrata);
    if(length(nobservation)==1) 
	{
 	  n.eachs=rep(nobservation,nstrata);
 	} else
	{
        # if let n.eachs unequal
	  n.eachs=sample(nobservation,nstrata,replace=TRUE);  
      }

    n=sum(n.eachs);

    # sample strata
    strata=sample(rep(1:nstrata,n.eachs[1:nstrata]),n,replace=FALSE);

    # sample response
    y=sample(1:nordinal,n,replace=TRUE);

    # sample covariates
    x=vector("list",nparm);
    for(i in 1:nparm)
      {
        if(cov.type[i]==1){x[[i]]=sample(c(0,1),n,replace=TRUE)};
        if(cov.type[i]==2){x[[i]]=sample(0:4,n,replace=TRUE)};
        #x[[i]]=rexp(n,0.1) # positive real 
        if(cov.type[i]==3){x[[i]]=rexp(n,0.1)}; # positive real 
        if(cov.type[i]==4){x[[i]]=rnorm(n,0.1)}; # real 
      };

    rdata=data.frame(strata,y,matrix(unlist(x),nrow=n,ncol=nparm,byrow=FALSE));
    names(rdata)= c("strata","y",paste("x",1:nparm,sep=""));
    return(rdata);
  }
# end of function sample.data
# example:
# rdata=sample.data(nstrata=200,nordinal=5,cov.type=c(rep(1,2),2,3,2,3,4,4),nobservation=6:8)
##############################################################################


##############################################################################
# function to extend multi-level covariate to binary covariates
# definition of parameters
# data:          a data frame with header (strata,y,x_1,...,x_p)
# pos.mult.cov:  a vector indicater the position of multi-level covariates
#                pos.mult.cov=c(3,5) means x_3,x_5 are multi-level
# method:        sumtozero means the dummy variable sum to zero, for example
#                x  x.1  x.2  x.3    
#                1    1    0    0
#                2    0    1    0
#                3    0    0    1
#                4   -1   -1   -1
#                reference means the set the highest level as reference, for example
#                x  x.1  x.2  x.3    
#                1    1    0    0
#                2    0    1    0
#                3    0    0    1
#                4    0    0    0

 
ext.data=function(data,pos.mult.cov,method="sumtozero")
  {

    pos.mult.cov=sort(pos.mult.cov);
    ext.num=NULL;

    nparm=ncol(data)-2;                          #total number of covariates
    for(i in 1:length(pos.mult.cov))
      {
        pos=pos.mult.cov[i]+2;                   #the position of covariates to be extended in data
        ori.covariate=data[,pos];                #original covariate to be extended
        ext.level=sort(unique(ori.covariate));   #the levels in the multi-level covariate
        ext.num[i]=length(ext.level);            #the number of levels

        #initial the extended covariates
        ext.covariates=matrix(0,ncol=ext.num[i]-1,nrow=length(ori.covariate));

        for(j in 1:length(ori.covariate))
          {
            temp.col.num= match(ori.covariate,ext.level)[j];
            if(method=="reference")
		{	
			if(temp.col.num!=length(ext.level)){ext.covariates[j,temp.col.num]=1}
		}; #set highest level as reference. 
            if(method=="sumtozero") 
		{
			if(temp.col.num!=length(ext.level)){ext.covariates[j,temp.col.num]=1}
			else {ext.covariates[j,]=-1}
		}; #sumtozero. 
          
          };

        temp.name=names(data);
        new.data=data.frame(data[,1:(pos-1)],ext.covariates);
        if(pos+1<=length(temp.name)){new.data=data.frame(new.data,data[,(pos+1):length(temp.name)])};
        data=new.data;
        
        new.name=c(temp.name[1:(pos-1)],paste(temp.name[pos],".",1:(ext.num[i]-1), sep=""));
        if(pos+1<=length(temp.name)){new.name=c(new.name, temp.name[(pos+1):length(temp.name)])};
        names(data)=new.name;

        pos.mult.cov[(i+1):length(pos.mult.cov)]= pos.mult.cov[(i+1):length(pos.mult.cov)]+ext.num[i]-2;
      };
    return(data);
  }
# end of function ext.data
# example:
# sdata=ext.data(rdata,c(3,5)) where x3 and x5 are multilevel
##############################################################################


##############################################################################
# main fiting function
# Method="clogit","spg","optim"
fitting.ACLR=function(rdata,method="spg")
  {
    # Data structure (strata,y,x_1,...,x_p)
    # y should be (0,1,2,...,nordinal)

    # Make sure the data is data.frame    
    rdata=as.data.frame(rdata);

    # Extract the basic variables from the data
    # Strata
    strata=rdata[,1];
    strname=unique(strata);     #the name of strata
    nstrata=length(strname);    #total number of strata

    # Responses
    rdata[,2]=rdata[,2]-min(rdata[,2]);                 #shift response to (0,1,2,...,nordinal
    y=rdata[,2];
    resname=sort(unique(y));          #the name of responses
    nordinal<-max(y); # define the ordinali of K
#    nordinal<-length(unique(rdata[,2]))-1; # define the ordinali of K

    # Covariates
    nparm=ncol(rdata)-2;        # number of covariates

    # Data in each stratum and N_i
    str.data=vector("list",nstrata);
    # n.eachs means N_i
    n.eachs=vector(mode="numeric",nstrata);
    for(i in 1:nstrata)
      {
        str.data[[i]]=rdata[rdata[,1]==strname[i],];
        n.eachs[i]=dim(str.data[[i]])[1];
      };

    # m.ri: M_ri, number of observations in stratum i having Y_ij>=r
    # s.ri: S_ri, nparm*1 vectors
    #       the sum of covariate vectors over M_ri observations
    #       with Y_ij>=r
    # q.ri: q_ri, nparm*1 vectors
    #       the sum of the observed covariate vectors over all possible
    #       M_ri observations, i.e. permutation of M_ri in N_i observations
    #       in stratum i
    # n.ri: total number of q_ri in i-th stratum for r=r. 

    # obtain the maximum column in i-r th element 
    nmaxcom=choose(max(n.eachs),as.integer(max(n.eachs)/2));
    
    # package for combinations function
    if(is.na(match("gtools",installed.packages()[,1]))) { install.packages("gtools")};
    library(gtools);

    # initial M_ri,S_ri,q_ri,n.ri
    m.ri=array(0,c(nstrata,nordinal));
    s.ri=array(0,c(nstrata,nordinal,nparm));
    q.ri=array(0,c(nstrata, nordinal, nparm, nmaxcom));
    n.ri=array(0,c(nstrata,nordinal));

    for(i in 1:nstrata)
      {
        y.ij=str.data[[i]][,2]
        for(r in 1:nordinal)
          {
            m.ri[i,r] =sum(y.ij>=r);
            s.ri[i,r,]=apply(str.data[[i]][y.ij>=r,3:(nparm+2)],2,sum);
  	    	if(m.ri[i,r]>0)
              {  q.ri.idx=combinations(n.eachs[i],m.ri[i,r],1:n.eachs[i])
                 n.ri[i,r]=dim(q.ri.idx)[1]
                 for(q in 1:n.ri[i,r])
		       {
		         q.ri[i,r,,q]=apply(str.data[[i]][q.ri.idx[q,],3:(nparm+2)],2,sum);
		       }
              }
          }
      };

    # log-likelihood function 
    log.likelihood=function(beta)
      {
        l.ir=array(0,c(nstrata,nordinal));
        for(i in 1:nstrata)
          {
            for(r in 1:nordinal)
              {
		    l.ir[i,r]=beta%*%s.ri[i,r,]-log(sum(exp(beta%*%q.ri[i,r,,1:n.ri[i,r]])));
	        }
          }
        return(sum(l.ir));
      };

    # pseudo-score function
    score=function(beta)
      {
	  u.ri=array(0,c(nstrata, nordinal, nparm));
        for(i in 1:nstrata)
          {
            for(r in 1:nordinal)
              {
		    if(n.ri[i,r]>0)
			{
		    	  numerator=rep(0,nparm);
	 	    	  for(q in 1:n.ri[i,r])
			    {
			  	numerator=numerator+q.ri[i,r,,q]*exp(beta%*%q.ri[i,r,,q]);
			    }
		    	  u.ri[i,r,]=s.ri[i,r,]-numerator/sum(exp(beta%*%q.ri[i,r,,1:n.ri[i,r]]));
			}
 	        }
          }
	  return(list(score=apply(u.ri,3,sum),u.ri=u.ri));
	}

    #information matrix
    info=function(beta)
	{
	  d.ri=array(0, c(nstrata, nordinal, nparm, nparm));
        for(i in 1:nstrata)
          {
            for(r in 1:nordinal)
              {
		    if(n.ri[i,r]>0)
			{
		    	  nume.1=array(0,c(nparm,nparm)); #\sum_{q_i\in\Omega_{ri}}q_{is}q_{it}exp(\beta^Tq_i) 
		    	  nume.2=rep(0,nparm); #\sum_{q_i\in\Omega_{ri}}q_{is}exp(\beta^Tq_i)
	 	    	  for(q in 1:n.ri[i,r])
			    {
			  	nume.1=nume.1+q.ri[i,r,,q]%*%t(q.ri[i,r,,q])*as.numeric(exp(beta%*%q.ri[i,r,,q]));
			  	nume.2=nume.2+q.ri[i,r,,q]*exp(beta%*%q.ri[i,r,,q]);
			    }
                    denominator=sum(exp(beta%*%q.ri[i,r,,1:n.ri[i,r]]));#\sum_{q_i\in\Omega_{ri}}exp(\beta^Tq_i) 
		    	  d.ri[i,r,,]=nume.1/denominator-nume.2%*%t(nume.2)/denominator^2;
			}
 	        }
          }
	  return(list(info=apply(d.ri,c(3,4),sum),d.ri=d.ri));
	}

   
    # get mle's of beta 
    if(method=="clogit")
      {
        # clogit approach
        
        if(is.na(match("survival",installed.packages()[,1]))) { install.packages("survival") };
        library(survival);

        # extend each stratum with multiple responses into K(nordinal) strata with binary responses
        extended.data=NULL
        for(i in 1:nrow(rdata))
          {
            y=match(rdata[i,2],resname)-1; #recode y into 0,1,...,K-1

  	      #replicate each line to nordinal lines
            tempdata=matrix(rdata[i,],ncol=ncol(rdata),nrow=nordinal,byrow=TRUE);
    
            #recode y 
            tempdata[,2]=c(rep(1,y),rep(0,nordinal-y));

            #recold i-r th stratum to i+r*nstrata stratum
            tempdata[,1]=unlist(tempdata[,1])+(1:nordinal)*nstrata;
 
            #rbind data
            extended.data=rbind(extended.data,tempdata);
          }

        # Convert extended.data to data.frame.
        extended.data=as.data.frame(matrix(unlist(extended.data),ncol=ncol(rdata),byrow=FALSE));
        names(extended.data)=names(rdata);

        # prepare formula for conditional logistic regression
        fmla=as.formula(paste(names(extended.data)[2]," ~ ", paste(names(extended.data[3:(nparm+2)]), collapse= "+"),"+strata(",names(extended.data[1]),")"));

        # implement conditional logistic regression
        clogit.out=clogit(fmla,data=extended.data);

        # estimate betas
        beta=clogit.out[[1]];

        # Information Matrix
        information=info(beta)$info;

        # Variance-Covariance Matrix: cov_mat<-qr.solve(info_ri);
        cov.mat=clogit.out[[2]];
      } 
    
    if(method=="spg")
      {
        # directly optimize loglikihood function, use score function as gradient and spg as optimize function.
        gr=function(vec){score(vec)$score};
        
        if(is.na(match("BB",installed.packages()[,1]))) {install.packages("BB")};
        library(BB);
        
        ans.spg=spg(par=c(rep(0, nparm)),fn=log.likelihood,gr=gr,control=list(maximize=TRUE));
    
        # estimate betas
        beta=ans.spg$par;

        # Information Matrix
        information=info(beta)$info;

        # Variance-Covariance Matrix: cov_mat<-qr.solve(info_ri)
        cov.mat<-qr.solve(information);
      }

    if(method=="optim")
      {
        # directly optimize loglikihood function, use score function as gradient and "optim" as optimize function.
        fnn=function(beta){-1*log.likelihood(beta)};
        grr=function(beta){-1*score(beta)$score};

        init=optim(c(rep(0, nparm)),method="BFGS",fn=fnn, gr=grr, hessian=TRUE);
          
        # Variance-Covariance Matrix:equal to hessian matrix
        cov.mat=init$hessian;
   
        # estimate betas
        beta=init$par;       

        # Information Matrix
        information=info(beta)$info;
      }

    # log-likelihood with estimated beta
    loglikelihood.beta=log.likelihood(beta);

    # score with estimate beta
    score.beta=score(beta)$score;

    # se
    se=sqrt(diag(cov.mat));

    # Sandwich Covariance Matrix
    u.ri=score(beta)$u.ri;
    u.tu=array(0,c(nparm,nparm));
    temp.u=apply(u.ri,c(1,3),sum);
    for(i in 1:nstrata)
      {
        u.tu=u.tu+temp.u[i,]%*%t(temp.u[i,]);
      }
    sand.cov.mat=cov.mat%*%u.tu%*%cov.mat;

    return(list(beta=beta,loglikelihood=loglikelihood.beta,score=score.beta,info=information,cov.mat=cov.mat,se=se,sand.cov=sand.cov.mat,sand.se=sqrt(diag(sand.cov.mat))));
  }
# end of function fitting.ACLR
# example:
# begtime=Sys.time()
# fit.out=fitting.ACLR(sdata)
# (runtime1=Sys.time()-begtime)
##############################################################################