##### Script for comparing performance of weighted least squares (minimum chi-square)
##### estimation of parameters of a gamma distribution with maximum likelihood estimation.
#####
library(gplots) # load library needed for function plotCI - plot points with error bars
library(MASS) # load library needed for function fitdistr - fit a model to an iid sample by max lik
data <- rgamma(1000,shape=3,rate=0.5) # a random sample of size 1000 from the gamma(3,0.5) distribution
log.data <- log(data) # logarithms of the data
log.data.hist <- hist(log.data,breaks=25,plot=FALSE) # bin the log data, 25 bins
counts <- log.data.hist$counts # bin counts
mids <- log.data.hist$mids # bin midpoints
mids <- mids[counts>0] # bin midpoints of bins with positive count
counts<-counts[counts>0] # counts of bins with positive count
plotCI(mids,log(counts),uiw=1/(sqrt(counts))) # plot of log counts against log data bin midpoints
# error bars equal +/- 1 standard deviation
coefs <- lm(log(counts)~mids+exp(mids),weights=counts)$coef
# fit log counts=a+b midpoints+c exp(midpoints)
# by weighted least square, weights=counts
lines(mids,coefs[1]+coefs[2]*mids+coefs[3]*exp(mids))
# plot fitted line
# Now I am going to repeat something like the previous 1000 times (replicates).
# For each replicate I'll compute MLE, LSE (least squares estimator, unweighted),
# WLSE (weighted least squares estimator, weights estimated from LSE)
nrep <- 1000 # number of replicates
rep <- 0 # counter
shape.lse <- numeric(nrep) # holder for "nrep" LSE of shape parameter
rate.lse <- numeric(nrep)
shape.wlse <- numeric(nrep)
rate.wlse <- numeric(nrep)
shape.mle <- numeric(nrep)
rate.mle <- numeric(nrep)
while (rep < nrep) {
data <- rgamma(1000,shape=3,rate=0.5) # take a new sample
coefs.mle <- fitdistr(data,"gamma")$estimate # compute MLE
log.data <- log(data) # log of data
log.data.hist <- hist(log.data,breaks=20,plot=FALSE)
counts <- log.data.hist$counts
mids <- log.data.hist$mids
mids <- mids[counts>0] # bin midpoints of binned data, bins with positive counts only
counts<-counts[counts>0] # counts of binned data, bin with positive counts only
fitted.lse <- lm(log(counts)~mids+I(-exp(mids)))
# Note the I(-exp(mids)). This means that the transformation - exp..
# is carried out and the resulting variable is used as explanatory
# variable in the least squares fit. Without I(...), the minus sign is
# interpreted to mean that we don't want this variable in the model,
# as opposed to +, which emans we do want this variable in the model.
# The model description "log(counts)~mids+I(-exp(mids))" is not a piece
# of R code (R functions, expressions) but a model formula, a string of text,
# which is parsed according to some different rules! Try help(model) to
# learn about this.
coefs.lse <- fitted.lse$coef
fits.lse <- fitted.lse$fit
fitted.wlse <- lm(log(counts)~mids+I(-exp(mids)),weights=exp(fits.lse))
coefs.wlse <- fitted.wlse$coef
rep <- rep+1
shape.lse[rep] <- coefs.lse["mids"]
rate.lse[rep] <- coefs.lse["I(-exp(mids))"]
shape.wlse[rep] <- coefs.wlse["mids"]
rate.wlse[rep] <- coefs.wlse["I(-exp(mids))"]
shape.mle[rep] <- coefs.mle["shape"]
rate.mle[rep] <- coefs.mle["rate"]
}
# make another plot of last replicate
plotCI(mids,log(counts),uiw=1/(sqrt(counts)))
fits.wlse <- fitted.wlse$fit
lines(mids,fits.wlse)
sqrt(mean((shape.mle-3)^2)) # root mean square error of MLE, shape parameter
sqrt(mean((shape.wlse-3)^2)) # root mean square error of WLSE, shape parameter
sqrt(mean((rate.mle-0.5)^2)) # root mean square error of MLE, rate parameter
sqrt(mean((rate.wlse-0.5)^2)) # root mean square error of WLSE, rate parameter