#-*- S -*-  

# Chapter 9   Non-linear Models

# first=T for deriv3 calculations of lmix2, plot.profile
library(MASS, first=T)
# nlme for self-starting nls()
if(version$major==3 && version$minor < 4) library(nlme, first=T)
trellis.device(postscript, file="ch09.ps", width=8, height=6, pointsize=9)
options(width=65, digits=5)


attach(wtloss)
# alter margin 4; others are default
oldpar <- par(mar=c(5.1, 4.1, 4.1, 4.1))
plot(Days, Weight, type="p",ylab="Weight (kg)")
Wt.lbs <- pretty(range(Weight*2.205))
axis(side=4, at=Wt.lbs/2.205, lab=Wt.lbs, srt=90)
mtext("Weight (lb)", side=4, line=3)
par(oldpar) # restore settings
detach()


# 9.1  Fitting non-linear regression models

wtloss.st <- c(b0=90, b1=95, th=120)
wtloss.fm <- nls(Weight ~ b0 + b1*2^(-Days/th),
    data = wtloss, start = wtloss.st, trace = T)
wtloss.fm

expn <- function(b0, b1, th, x)
{
    temp <- 2^(-x/th)
    model.func <- b0 + b1 * temp
    Z <- cbind(1, temp, (b1 * x * temp * log(2))/th^2)
    dimnames(Z) <- list(NULL, c("b0","b1","th"))
    attr(model.func, "gradient") <- Z
    model.func
}

wtloss.gr <- nls(Weight ~ expn(b0, b1, th, Days),
    data = wtloss, start = wtloss.st, trace = T)

expn1 <- deriv(y ~ b0 + b1 * 2^(-x/th), c("b0", "b1", "th"),
               function(b0, b1, th, x) {})

negexp <- selfStart(model = ~ b0 + b1*exp(-x/th), 
    initial = negexp.ival, parameters = c("b0", "b1", "th"),
    template = function(x, b0, b1, th) {})

wtloss.ss <- nls(Weight ~ negexp(Days, B0, B1, theta), wtloss, trace = T)


# 9.2  Non-linear fitted model objects and method functions

summary(wtloss.gr)
deviance(wtloss.gr)
vcov(wtloss.gr)


# 9.3  Taking advantage of linear parameters

wtloss.pl <- nls(Weight ~ cbind(1, 2^(-Days/th)),
             data=wtloss, algorithm="plinear", 
	     start = c(th=120), trace=T)
summary(wtloss.pl)


# 9.4  Confidence intervals for parameters

confint <- function(object, ...) UseMethod("confint")

confint.nls <- function(object, parm, level = 0.95)
{
  if(is.character(parm))
    parm <- match(parm, names(coef(object)), nomatch = 0)
  pro <- profile(object, which = parm, 
                 alphamax = (1 - level)/4)[[parm]]
  x <- pro[, "par.vals"][, parm]
  y <- pro$tau
  n <- length(object$fitted.values) - length(object$parameters)
  cutoff <- qt(1 - (1 - level)/2, n)
  if(max(y) < cutoff || min(y) >  - cutoff)
    stop("the profiling did not extend far enough")
  sp <- spline(x, y)
  approx(sp$y, sp$x, xout = c( - cutoff, cutoff))$y
}

expn2 <- deriv(~b0 + b1*((w0 - b0)/b1)^(x/d0), 
         c("b0","b1","d0"), function(b0, b1, d0, x, w0) {})

wtloss.init <- function(obj, w0)
{
  p <- coef(obj)
  d0 <-  - log((w0 - p["b0"])/p["b1"], 2) * p["th"]
  c(p[c("b0", "b1")], d0 = as.vector(d0))
}

out <- NULL
w0s <- c(110, 100, 90)
for(w0 in w0s) {
    fm <- nls(Weight ~ expn2(b0, b1, d0, Days, w0),
              wtloss, start = wtloss.init(wtloss.gr, w0))
    out <- rbind(out, c(coef(fm)["d0"], confint(fm, "d0")))
  }
dimnames(out) <- list(paste(w0s, "kg:"),  c("d0", "low", "high"))
out


attach(stormer)
fm0 <- lm(Wt*Time ~ Viscosity + Time - 1,  data=stormer)
b0 <- coef(fm0)
names(b0) <- c("b1", "b2")
b0
storm.fm <- nls(Time ~ b1*Viscosity/(Wt-b2), data=stormer, start=b0,
          trace=T)

bc <- coef(storm.fm)
se <- sqrt(diag(vcov(storm.fm)))
dv <- deviance(storm.fm)
par(pty = "s")
b1 <- bc[1] + seq(-3*se[1], 3*se[1], length = 51)
b2 <- bc[2] + seq(-3*se[2], 3*se[2], length = 51)
bv <- expand.grid(b1, b2)
ssq <- function(b)
       sum((Time - b[1] * Viscosity/(Wt-b[2]))^2)
dbetas <- apply(bv, 1, ssq)
cc <- matrix(Time - rep(bv[,1],rep(23, 2601)) *
       Viscosity/(Wt - rep(bv[,2], rep(23, 2601))), 23)
dbetas <- matrix(drop(rep(1, 23) %*% cc^2), 51)
fstat <- matrix( ((dbetas - dv)/2) / (dv/21), 51, 51)
qf(0.95, 2, 21)
plot(b1, b2, type="n")
lev <- c(1,2,5,7,10,15,20)
contour(b1, b2, fstat, levels=lev, labex=0.75, lty=2, add=T)
contour(b1, b2, fstat, levels=qf(0.95,2,21), add=T, labex=0)
text(31.6,0.3,"95% CR", adj=0, cex=0.75)
points(bc[1], bc[2], pch=3, mkh=0.1)
detach()

if(version$major >= 4) {
  storm.fm <- nls(Time ~ b*Viscosity/(Wt - c), stormer,
     start = c(b=29.401, c=2.2183))
  storm.bf <- function(rs) {
     assign("Tim", fitted(storm.fm) + rs, frame = 1)
     nls(Tim ~ (b * Viscosity)/(Wt - c), stormer,
              start = coef(storm.fm))$parameters
  }
  rs <- scale(resid(storm.fm), scale = F)
  storm.boot <- bootstrap(rs, storm.bf, B=1000)
  print(summary(storm.fm)$parameters)
  print(storm.boot)
  print(limits.emp(storm.boot))
}


# 9.5  Assessing the linear approximation

expn3 <- deriv3(~ b0 + b1*2^(-x/th), c("b0","b1","th"),
      function(x, b0, b1, th) {})
wtloss.he <- nls(Weight ~ expn3(Days, b0, b1, th),
      wtloss, start = coef(wtloss.gr))
rms.curv(wtloss.he)

par(pty="m")
plot(profile(wtloss.gr))


# 9.6  Constrained non-linear regression

attach(cats)
Bwt <- Bwt[Sex=="F"]
Hwt <- Hwt[Sex=="F"]
nnls.fit(cbind(1, -1, Bwt), Hwt)
rm(Bwt, Hwt)
detach()

attach(wtloss)
nlregb(nrow(wtloss), c(90,95,120), function(x) 
  Weight-x[1]-x[2]*2^(-Days/x[3]), lower=rep(0,3))$parameters
detach()

wtloss.r <- function(x, Weight, Days)
    Weight - x[1] - x[2] * 2^(-Days/x[3])
wtloss.rg <- function(x, Weight, Days)
{
    temp <- 2^(-Days/x[3])
    -cbind(1, temp, x[2]*Days*temp*log(2)/x[3]^2)
}
wtloss.nl <- nlregb(nrow(wtloss), c(90, 95, 120), 
   wtloss.r,  wtloss.rg, lower = rep(0,3), 
   Weight = wtloss$Weight, Days = wtloss$Days)

vcov1 <- function(object)
{
   gr <- object$jacobian
   df <- length(object$resid) - length(object$param)
   sum(object$resid^2)/df * solve(t(gr) %*% gr)
}

sqrt(diag(vcov1(wtloss.nl)))
sqrt(diag(vcov.nlregb(wtloss.nl, method="Fisher")))
sqrt(diag(vcov.nlregb(wtloss.nl, method="observed")))
sqrt(diag(vcov.nlregb(wtloss.nl, method="Huber")))

wtloss.nl0 <-  nlregb(nrow(wtloss), c(90,95,120), wtloss.r, 
         lower = rep(0,3), Weight = wtloss$Weight, Days = wtloss$Days)
sqrt(diag(vcov.nlregb(wtloss.nl0)))


# 9.7  General optimization and maximum likelihood estimation

attach(faithful)
truehist(waiting, xlim=c(35,105), ymax=0.045)
width.SJ(waiting)
wait.dns <- density(waiting, 200, width=10)
lines(wait.dns, lty=2)
lmix2 <- deriv3(
     ~ -log(p*dnorm((x-u1)/s1)/s1 + (1-p)*dnorm((x-u2)/s2)/s2),
     c("p", "u1", "s1", "u2", "s2"), 
     function(x, p, u1, s1, u2, s2) NULL)

p0 <- c(p=mean(waiting < 70), u1=50, s1=5, u2=80, s2=5)
p0
tr.ms <- function(info, theta, grad, scale, flags, fit.pars)
{
    cat(round(info[3], 3), ":", signif(theta), "\n")
    invisible()
}
wait.mix2 <- ms(~ lmix2(waiting, p, u1, s1, u2, s2),
    start=p0, data = faithful, trace = tr.ms)
dmix2 <- function(x, p, u1, s1, u2, s2)
             p * dnorm(x, u1, s1) + (1-p) * dnorm(x, u2, s2)
cf <- coef(wait.mix2)
attach(structure(as.list(cf), names = names(cf)))
wait.fdns <- list(x = wait.dns$x, 
                  y = dmix2(wait.dns$x, p, u1, s1, u2, s2))
lines(wait.fdns)
par(usr = c(0,1,0,1))
legend(0.1, 0.9, c("Normal mixture", "Nonparametric"), 
    lty = c(1,2), bty = "n")


pmix2 <- deriv(~ p*pnorm((x-u1)/s1) + (1-p)*pnorm((x-u2)/s2),
               "x", function(x, p, u1, s1, u2, s2) {})
pr0 <- (seq(along = waiting) - 0.5)/length(waiting)
x0 <- x1 <- as.vector(sort(waiting)) ; del <- 1; i <- 0
while((i <- 1 + 1) < 10 && abs(del) > 0.0005) {
    pr <- pmix2(x0, p, u1, s1, u2, s2)
    del <- (pr - pr0)/attr(pr, "gradient")
    x0 <- x0 - del
    cat(format(del <- max(abs(del))), "\n")
}
detach(2)

par(pty = "s")
plot(x0, x1, xlim = range(x0, x1), ylim = range(x0, x1),
    xlab = "Model quantiles", ylab = "Waiting time")
abline(0,1)

vmat <- summary(wait.mix2)$Information
cbind(coef(wait.mix2), sqrt(diag(vmat)))
as.vector((cf["s1"] - cf["s2"]) /
          sqrt(c(0,0,1,0,1) %*% vmat %*% c(0,0,1,0,1)))
wait.mix2e <- ms(~ -log(p * dnorm(waiting, u1, s1) +
   (1-p) * dnorm(waiting, u2, s1)), start = cf[-5],
   data = faithful)
coef(wait.mix2e)
2*(wait.mix2e$value - wait.mix2$value)

mix.f <- function(p)
{
   e <- p[1]*dnorm((waiting-p[2])/p[3])/p[3] + 
        (1-p[1])*dnorm((waiting-p[4])/p[5])/p[5]
   -sum(log(e))
}
waiting.init <- c(mean(waiting < 70), 50, 5, 80, 5)
nlmin(mix.f, waiting.init, print.level=1)

mix.obj <- function(p, x)
{
   e <- p[1]*dnorm((x-p[2])/p[3])/p[3] + 
        (1-p[1])*dnorm((x-p[4])/p[5])/p[5]
   -sum(log(e))
}
mix.nl0 <- nlminb(waiting.init,  mix.obj, 
   scale = c(10, rep(1,4)), lower = c(0, -Inf, 0, -Inf, 0),
   upper = c(1, rep(Inf, 4)), x = waiting)

lmix2a <- deriv(
     ~ -log(p*dnorm((x-u1)/s1)/s1 + (1-p)*dnorm((x-u2)/s2)/s2),
     c("p", "u1", "s1", "u2", "s2"), 
     function(x, p, u1, s1, u2, s2) NULL)
mix.gr <- function(p, x) 
{
# workaround for bug in 4.0
   u1 <- p[2]; s1 <- p[3]; u2 <- p[4]; s2 <- p[5]; p <- p[1]
   e <- lmix2a(x, p, u1, s1, u2, s2)
   rep(1, length(x)) %*% attr(e, "gradient")
}
mix.nl1 <- nlminb(waiting.init, mix.obj, mix.gr, 
   scale = c(10, rep(1,4)), lower = c(0, -Inf, 0, -Inf, 0),
   upper = c(1, rep(Inf, 4)), x = waiting)

mix.grh <- function(p, x) 
{
   e <- lmix2(x, p[1], p[2], p[3], p[4], p[5])
   g <- attr(e, "gradient")
   g <- rep(1, length(x)) %*% g
   H <- apply(attr(e, "hessian"), c(2,3), sum)
   list(gradient=g, hessian=H[row(H) <= col(H)])
}
mix.nl2 <- nlminb(waiting.init, mix.obj, mix.grh, T, 
   scale = c(10, rep(1,4)), lower = c(0, -Inf, 0, -Inf, 0),
   upper = c(1, rep(Inf, 4)), x = waiting)

sqrt(diag(vcov.nlminb(mix.nl0)))
sqrt(diag(vcov.nlminb(mix.nl1)))
sqrt(diag(vcov.nlminb(mix.nl2)))

detach()

AIDSfit <- function(y, z, start=rep(mean(y), ncol(z)), ...)
{
   deviance <- function(beta, y, z)
   {
       mu <- z %*% beta
       2 * sum(mu - y - y*log(mu/y))
   }
   grad <- function(beta, y, z)
   {
       mu <- z %*% beta
       2 * t(1 - y/mu) %*% z
   }
   nlminb(start, deviance, grad, lower = 0, y = y, z = z, ...)
}

Y <- scan(n=13)
12 14 33 50 67 74 123 141 165 204 253 246 240

library(nnet) # for class.ind
s <- seq(0, 13.999, 0.01); tint <- 1:14
X <- expand.grid(s, tint)
Z <- matrix(pweibull(pmax(X[,2] - X[,1],0), 2.5, 10),length(s))
Z <- Z[,2:14] - Z[,1:13]
Z <- t(Z) %*% class.ind(factor(floor(s/2))) * 0.01
round(AIDSfit(Y, Z)$param)
rm(s, X, Y, Z)


# =========================================

# Chapter 9 Complements


# 9.4  Confidence intervals for parameters

if(version$major >= 4) {
  storm.boot <- bootstrap(rs, storm.bf, seed=101, B=1000)
  print(summary(storm.boot))

  print(jack.after.bootstrap(storm.boot, "Bias"))
}

# End of ch09
