#
#  rmutil : A Library of Special Functions for Repeated Measurements
#  Copyright (C) 1998 J.K. Lindsey
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
#  SYNOPSIS
#
#     read.list(file="", skip=0, nlines=2, order=NULL)
#     read.surv(file="", skip=0, nlines=1, cumulative=T, all=T)
#     restovec(response, times=NULL, totals=NULL, nest=NULL,
#	censor=NULL, delta=NULL)
#     tcctomat(ccov, names=NULL, oldtccov=NULL)
#     rmna(response, tvcov=NULL, ccov=NULL)
#
#  DESCRIPTION
#
#    Utility functions for reading repeated measurements data and
# converting them to R objects

read.list <- function(file="", skip=0, nlines=2, order=NULL){
	if(!is.null(order)){
		if(length(order)!=nlines)
			stop("order must have length",nlines,"\n")
		else if(range(order)!=c(1,nlines))
			stop("order must have values in",1:nlines,"\n")}
	continue <- T
	result <- list()
	while(continue){
		x <- scan(file,skip=skip,nlines=nlines,quiet=T)
		skip <- skip+nlines
		if(length(x)==0)continue <- F
		else {
			tmp <- matrix(x,ncol=nlines)
			if(!is.null(order))tmp <- tmp[,order]
			result <- c(result,list(tmp))}}
	invisible(result)}

read.surv <- function(file="", skip=0, nlines=1, cumulative=T, all=T){
	continue <- T
	result <- list()
	censor <- NULL
	while(continue){
		x <- scan(file,skip=skip,nlines=nlines,quiet=T)
		skip <- skip+nlines
		if(length(x)==0)continue <- F
		else {
			if(all)mm <- matrix(x,ncol=2,byrow=T)[,1]
			else mm <- x[1:(length(x)-1)]
			if(cumulative)mm <- c(mm[1],diff(mm))
			result <- c(result,list(mm))
			censor <- c(censor,x[length(x)])}}
	invisible(list(result,censor))}

restovec <- function(response, times=NULL, totals=NULL, nest=NULL,
	censor=NULL, delta=NULL){
	if(missing(response))stop("A response must be supplied")
	nind <- 0
	tnest <- nobs <- y <- NULL
	if(is.matrix(response)||is.data.frame(response)){
		if(is.data.frame(response))response <- as.matrix(response)
		if(is.data.frame(totals))totals <- as.matrix(totals)
		if(is.vector(censor,mode="double")||is.vector(censor,mode="integer")){
			if(length(censor)!=nrow(response))
				stop("Censoring vector must be the same length as the number of individuals")
			else {
				tmp <- matrix(1,nrow=nrow(response),ncol=ncol(response))
				tmp[,ncol(tmp)] <- censor
				censor <- tmp}}
		if(is.matrix(censor)){
			if(nrow(censor)!=nrow(response)||ncol(censor)!=ncol(response))
				stop("Censoring matrix must have the same dimensions as response")
			else censor <- as.vector(t(censor))}
		if(is.matrix(totals)){
			if(nrow(totals)!=nrow(response)||ncol(totals)!=ncol(response))
				stop("totals matrix must have the same dimensions as response")
			else totals <- as.vector(t(totals))}
		else if(!is.null(totals)&&(is.vector(totals,mode="double")||is.vector(totals,mode="integer"))){
		     if(length(totals)!=nrow(response))stop("totals vector must have same length as number of individuals")
		     else totals <- rep(totals,rep(ncol(response),nrow(response)))}
		if(is.null(times)){
			if(is.null(censor))times <- as.double(rep(1:ncol(response),nrow(response)))}
		else if(is.vector(times,mode="double")) {
			if(is.null(nest)&&any(diff(times)<=0,na.rm=T))stop("Times must be strictly increasing")
			if(length(times)!=ncol(response))stop("Number of times must equal number of response columns")
			times <- rep(times,nrow(response))}
		else if(is.matrix(times)){
			if(ncol(times)!=ncol(response)|nrow(times)!=nrow(response))stop("Matrix of times must be the same size as matrix of responses")
			for(i in 1:nrow(response))
				if(any(diff(times[i,])<=0,na.rm=T))
				stop(paste("Negative time step for individual ",i))
			times <- as.vector(t(times))}
		nobs <- rep(ncol(response),nrow(response))
		if(!is.null(nest)){
			if(is.vector(nest,mode="double")||is.vector(nest,mode="integer")){
				if(length(nest)!=ncol(response))
					stop("Length of nest vector not equal to number of observations per individual")
				else if(any(diff(nest)!=0&diff(nest)!=1,na.rm=T))
					stop("Nest categories must be consecutive increasing integers")
				else tnest <- rep(nest,nrow(response))}
			else if(is.matrix(nest)){
				if(any(dim(nest)!=dim(response)))
					stop("Dimensions of nest not the same as response")
				for(i in nrow(nest))if(any(diff(nest[i,])!=0&diff(nest[i,])!=1,na.rm=T))
					stop("Nest categories must be consecutive increasing integers")
				tnest <- as.vector(t(nest))}
			else stop("nest must be a vector or matrix")}
		if(!is.null(delta)){
			if(is.vector(delta,mode="double")){
				if(length(delta)>1){
					if(length(delta)!=ncol(response))
						stop("Length of delta not equal to number of observations per individual")
					else delta <- rep(delta,nrow(response))}}
			else if(is.matrix(delta)){
				if(any(dim(delta)!=dim(response)))
					stop("Dimensions of delta not the same as response")
				delta <- as.vector(t(delta))}
			else stop("delta must be a vector or matrix")}
		y <- as.vector(t(response))}
	else if(is.list(response)){
		if(!is.null(delta)&&!is.vector(delta,mode="double")||length(delta)>1)
			stop("delta must be a scalar when response is a list")
		if(!is.null(totals)){
			if(!is.vector(totals,mode="double")&&!is.vector(totals,mode="integer"))stop("totals must be a scalar or vector when response is a list")
			else totv <- T}
		else totv <- F
		if(is.null(censor)){
			times <- NULL
			tot <- del <- cen <- nes <- 0
			ncols <- ncol(as.matrix(response[[1]]))
			if(ncols<2)stop("matrices must have at least 2 columns: responses and times")
			else if(ncols>2)for(j in response){
				j <- as.matrix(j)
				for(k in 3:ncols){
					if(is.null(censor)&&any(j[,k]<=0,na.rm=T))
						cen <- k
					else if(any(j[,k]>1,na.rm=T)&&all(j[,k]==trunc(j[,k]),na.rm=T)){
					     if(all(j[,k]>=j[,1],na.rm=T))tot <- k
						else nes <- k}
					else if(is.null(delta)&&all(j[,k]>0,na.rm=T))del <- k}
				if((ncols==3&&(nes>0||cen>0||del>0||tot>0))||(ncols==4&&((nes>0&&cen>0)||(nes>0&&del>0)||(nes>0&&tot>0)||(cen>0&&del>0)))||(ncols>=5&&((nes>0&&cen>0&&del>0)||(nes>0&&tot>0))))break}
			for(i in response){
				i <- as.matrix(i)
				nind <- nind+1
				if(ncol(i)!=ncols)
					stop(paste("Individual ",nind,"does not have a",ncols,"column matrix"))
				if(nes==0&&any(diff(i[,2])<0,na.rm=T))
					stop(paste("Negative time step for individual ",nind))
				nobs <- c(nobs,nrow(i))
				y <- c(y,i[,1])
				times <- c(times,i[,2])
				if(nes>0){
					if(any(diff(i[,nes])!=0&diff(i[,nes])!=1,na.rm=T))
						stop("nest categories for individual ",nind,"are not consecutive increasing integers")
					tnest <- c(tnest,i[,nes])}
				if(!totv&&tot>0)totals <- c(totals,i[,tot])
				if(cen>0)censor <- c(censor,i[,cen])
				if(del>0)delta <- c(delta,i[,del])}}
		else if(!is.vector(censor,mode="double")&&!is.vector(censor,mode="integer"))
			stop("If response is a list, censor must be a vector")
		else {
			tmp <- NULL
			j <- 0
			for(i in response){
				nind <- nind+1
				if(!is.vector(i,mode="double")&&!(is.matrix(i)&&ncol(i)==1))
					stop(paste("Individual ",nind,"does not have a vector or one column matrix"))
				tmp <- c(tmp,rep(1,length(i)-1),censor[j <- j+1])
				y <- c(y,i)
				nobs <- c(nobs,length(i))}
			censor <- tmp}
		if(totv){
			if(length(totals)==1)totals <- rep(totals,length(y))
			else if(length(totals)!=length(y))
			     stop("totals must have one value per response")}}
	else
		stop("Observations must be supplied as a list of matrices, matrix, or dataframe")
	if(!is.null(censor)&&any(censor!=-1&censor!=0&censor!=1,na.rm=T))
		stop("censor must only contain -1, 0, and 1")
	if(!is.null(totals)&&(any(totals<y,na.rm=T)||any(totals<0,na.rm=T)))
		stop("all totals must be positive and >= to responses")
	if(!is.null(tnest)&&(any(tnest<1,na.rm=T)||any(tnest!=trunc(tnest),na.rm=T)))
		stop("nest must contain integers starting at 1")
	if(!is.null(delta)&&any(delta<=0,na.rm=T))
		stop("delta must be strictly positive")
	z <- list(y=y, n=totals, times=times, nobs=nobs, nest=tnest,
	  censor=censor, delta=delta)
	class(z) <- "response"
	z}

print.response <- function(z){
	if(inherits(z,"response")){
	cat("Number of individuals:                ",length(z$nobs),"\n")
	cat("Number of observations:               ",sum(z$nobs),"\n")
	cat("Number of observations per individual:\n",z$nobs,"\n")
	if(is.null(z$n))y <- z$y
	else y <- z$y/z$n
	cat("Mean response:                        ",mean(y,na.rm=T),"\n")
	cat("Range of responses:                   ",range(y,na.rm=T),"\n")
	if(any(is.na(z$y)))
		cat("Number of NAs:                        ",sum(is.na(z$y)),"\n")
	if(!is.null(z$times)){
		cat("Mean time:                            ",mean(z$times,na.rm=T),"\n")
		cat("Range of times:                       ",range(z$times,na.rm=T),"\n")}
	if(!is.null(z$nest))
		cat("Number of clusters:                   ",max(z$nest),"\n")
	if(!is.null(z$censor)) {
		if(sum(z$censor==0,na.rm=T)>0)cat("Number of right-censored observations:",sum(z$censor==0,na.rm=T),"\n")
		if(sum(z$censor==-1,na.rm=T)>0)cat("Number of left-censored observations: ",sum(z$censor==-1,na.rm=T),"\n")}
	if(!is.null(z$delta)&&length(z$delta)==1)
		cat("Unit of measurement:                  ",z$delta,"\n")}}

plot.response <- function(z, subset=NULL, nest=1, add=F, lty=NULL, main=NULL,
	ylim=range(z$y), xlim=range(z$times), xlab="Time", ylab="Response"){
	if(inherits(z,"response")&&!is.null(z$times)){
		tnest <- if(!is.null(z$nest)) z$nest
			else 1
		nm <- rep(1:length(z$nobs),z$nobs)
		j <- 1
		lt <- 0
		if(is.null(subset))subset <- 1:length(z$nobs)
		if(is.null(z$n))y <- z$y
		else y <- z$y/z$n
		for(i in 1:length(z$nobs))if(any(i==subset)){
			if(is.null(lty))lt <- lt%%4+1
			else lt <- lty
			if(!add&&j==1)plot(z$times[nm==i&nest==tnest],
				y[nm==i&nest==tnest],lty=lt,
				type="l",ylim=ylim,xlim=xlim,
				main=main,ylab=ylab,xlab=xlab)
			else lines(z$times[nm==i&nest==tnest],
				y[nm==i&nest==tnest],lty=lt)
			j <- j+1}}}

tvctomat <- function(tvcov, names=NULL, oldtvcov=NULL){
	nbs <- tvcv <- NULL
	if(is.matrix(tvcov)||is.data.frame(tvcov)){
		if(is.data.frame(tvcov))tvcov <- as.matrix(tvcov)
		nbs <- rep(ncol(tvcov),nrow(tvcov))
		tvcv <- matrix(as.vector(t(tvcov)),ncol=1)
		colnames(tvcv) <- if(!is.null(names)) names
		else paste(deparse(substitute(tvcov)))}
	else if(is.list(tvcov)){
		if(!inherits(tvcov,"tvcov")){
			for(i in tvcov){
				i <- as.matrix(i)
				nbs <- c(nbs,dim(i)[1])
				tvcv <- rbind(tvcv,i)}
			if(is.null(colnames(tvcov[[1]]))){
				if(is.null(names))
					names <- paste(deparse(substitute(tvcov)))}
				else names <- colnames(tvcov[[1]])
			if(length(names)==1&&ncol(tvcv)>1){
				tmp <- NULL
				for(i in 1:ncol(tvcv))
					tmp <- c(tmp,paste(names,i,sep=""))
				names <- tmp}
			colnames(tvcv) <- names}
		else if(inherits(tvcov,"tvcov")){
			nbs <- tvcov$nobs
			tvcv <- tvcov$tvcov}}
	else stop("The time-varying covariates must be a matrix, dataframe, or list")
	if(!is.null(oldtvcov)){
		if(!inherits(oldtvcov,"tvcov"))
			stop("oldtvcov must have class, tvcov")
		else if((nrow(oldtvcov$tvcov)==nrow(tvcv))&&
			all(oldtvcov$nobs==nbs))
			oldtvcov$tvcov <- cbind(oldtvcov$tvcov,tvcv)
		else stop("old and new covariates do not have the same number of observations")}
	else {
		oldtvcov <- list(tvcov=tvcv,nobs=nbs)
		class(oldtvcov) <- "tvcov"}
	oldtvcov}

print.tvcov <- function(z){
	if(inherits(z,"tvcov")){
	cat("Number of individuals:            ",length(z$nobs),"\n")
	cat("Number of observations:           ",sum(z$nobs),"\n")
	cat("Number of observations per individual:\n",z$nobs,"\n")
	cat("Number of time-varying covariates:",ncol(z$tvcov),"\n")
	cat("Names of time-varying covariates:\n",colnames(z$tvcov),"\n")}}

tcctomat <- function(ccov, names=NULL, oldtccov=NULL){
	linear <- NULL
	if(is.language(ccov)){
		linear <- ccov
		mt <- terms(ccov)
		mf <- model.frame(mt,sys.frame(sys.parent()),na.action=na.fail)
		ccov <- model.matrix(mt,mf)[,-1,drop=F]}
	else if(is.factor(ccov))stop("Factor variables can only be used in formulae")
	if(is.vector(ccov,mode="double")){
		if(is.null(names))names <- paste(deparse(substitute(ccov)))
		ccov <- matrix(ccov,ncol=1)}
	else if(!is.matrix(ccov))
		stop("Time-constant covariates must be a vector, matrix, or model formula")
	if(is.null(colnames(ccov))){
		if(is.null(names))names <- paste(deparse(substitute(ccov)))
		if(length(names)==1&&ncol(ccov)>1){
			tmp <- NULL
			for(i in 1:ncol(ccov))
				tmp <- c(tmp,paste(names,i,sep=""))
			names <- tmp}
		colnames(ccov) <- names}
	if(!is.null(oldtccov)){
		if(!inherits(oldtccov,"tccov"))
			stop("oldtccov must have class, tccov")
		else if(nrow(oldtccov$ccov)==nrow(ccov))
			oldtccov$ccov <- cbind(oldtccov$ccov,ccov)
		else stop("old and new covariates do not have the same number of individuals")}
	else {
		oldtccov <- list(ccov=ccov, linear=linear)
		class(oldtccov) <- "tccov"}
	oldtccov}

print.tccov <- function(z){
	if(inherits(z,"tccov")){
	cat("Number of individuals:             ",nrow(z$ccov),"\n")
	cat("Number of time-constant covariates:",ncol(z$ccov),"\n")
	cat("Names of time-constant covariates:\n",colnames(z$ccov),"\n")}}

rmna <- function(response, tvcov=NULL, ccov=NULL){
	if(!inherits(response,"response"))
		stop("The response must have class, response.")
	rna <- !is.na(response$y)
	if(!is.null(response$n))rna <- rna&!is.na(response$n)
	for(i in 1:length(response$nobs))
		if(!is.null(ccov)&&any(is.na(ccov$ccov[i,])))rna[covind(response)==i] <- F
	if(!is.null(tvcov)){
		if(!inherits(tvcov,"tvcov"))
			stop("The time-varying covariates must have class, tvcov.")
		if(any(response$nobs!=tvcov$nobs))stop("Numbers of observations for response and time-varying covariates do not agree.")
		for(i in ncol(tvcov$tvcov))rna <- rna&!is.na(tvcov$tvcov[,i])}
	if(!is.null(ccov)){
		if(!inherits(ccov,"tccov"))
			stop("The time-constant covariates must have class, tccov.")
		if(length(response$nobs)!=nrow(ccov$ccov))stop("Numbers of individuals for response and for time-constant covariates do not agree.")}
	response$y <- response$y[rna]
	if(!is.null(response$times))response$times <- response$times[rna]
	if(!is.null(response$totals))response$totals <- response$totals[rna]
	if(!is.null(response$censor))response$censor <- response$censor[rna]
	if(!is.null(response$delta)&&length(response$delta)>1)response$delta <- response$delta[rna]
	if(!is.null(tvcov))tvcov$tvcov <- tvcov$tvcov[rna,,drop=F]
	if(!is.null(response$nest))response$nest <- response$nest[rna]
	tmp <- NULL
	j <- c(0,cumsum(response$nobs))
	for(i in 1:length(response$nobs)){
		tmp <- c(tmp,sum(rna[(j[i]+1):j[i+1]]))
		if(tmp[i]==0){
			warning(paste("Individual",i,"has no observations"))}}
	response$nobs <- tmp[tmp>0]
	if(!is.null(ccov))ccov$ccov <- ccov$ccov[tmp>0,,drop=F]
	if(!is.null(tvcov))tvcov$nobs <- tmp[tmp>0]
	z <- list(response=response,tvcov=tvcov,ccov=ccov)
	class(z) <- "repeated"
	z}

print.repeated <- function(z){
	if(inherits(z,"repeated")){
		cat("Response variable:\n\n")
		print.response(z$response)
		if(!is.null(z$ccov)){
			cat("\nTime-constant covariates:\n\n")
			print.tccov(z$ccov)}
		if(!is.null(z$tvcov)){
			cat("\nTime-varying covariates:\n\n")
			print.tvcov(z$tvcov)}}}

plot.repeated <- function(z, variable="response", number=1, subset=NULL,
	add=F, lty=NULL, main=NULL, ylim=range(z$response$y),
	xlim=range(z$response$times), xlab="Time", ylab="Response"){
	variable <- match.arg(variable,c("response","time-varying covariate"))
	if(inherits(z,"repeated")){
		if(variable=="response")
			plot.response(z$response, subset=subset,
				add=add, lty=lty, main=main, ylim=ylim,
				xlim=xlim, xlab=xlab, ylab=ylab)
		else if(variable=="time-varying covariate"){
			if(number>ncol(z$tvcov$tvcov))
				stop("Less than",number,"covariates")
			if(missing(ylab))ylab <- colnames(z$tvcov$tvcov)[number]
			zz <- list()
			zz$times <- z$response$times
			zz$y <- z$tvcov$tvcov[,number]
			zz$nobs <- z$tvcov$nobs
			if(missing(ylim))ylim <- range(zz$y)
			class(zz) <- "response"
			plot.response(zz, subset=subset,
				add=add, lty=lty, main=main, ylim=ylim,
				xlim=xlim, xlab=xlab, ylab=ylab)}}}
