##### Script for comparing performance of weighted least squares (minimum chi-square) ##### estimation of parameters of a gamma distribution with maximum likelihood estimation. ##### library(gplots) # load library needed for function plotCI - plot points with error bars library(MASS) # load library needed for function fitdistr - fit a model to an iid sample by max lik data <- rgamma(1000,shape=3,rate=0.5) # a random sample of size 1000 from the gamma(3,0.5) distribution log.data <- log(data) # logarithms of the data log.data.hist <- hist(log.data,breaks=25,plot=FALSE) # bin the log data, 25 bins counts <- log.data.hist$counts # bin counts mids <- log.data.hist$mids # bin midpoints mids <- mids[counts>0] # bin midpoints of bins with positive count counts<-counts[counts>0] # counts of bins with positive count plotCI(mids,log(counts),uiw=1/(sqrt(counts))) # plot of log counts against log data bin midpoints # error bars equal +/- 1 standard deviation coefs <- lm(log(counts)~mids+exp(mids),weights=counts)$coef # fit log counts=a+b midpoints+c exp(midpoints) # by weighted least square, weights=counts lines(mids,coefs[1]+coefs[2]*mids+coefs[3]*exp(mids)) # plot fitted line # Now I am going to repeat something like the previous 1000 times (replicates). # For each replicate I'll compute MLE, LSE (least squares estimator, unweighted), # WLSE (weighted least squares estimator, weights estimated from LSE) nrep <- 1000 # number of replicates rep <- 0 # counter shape.lse <- numeric(nrep) # holder for "nrep" LSE of shape parameter rate.lse <- numeric(nrep) shape.wlse <- numeric(nrep) rate.wlse <- numeric(nrep) shape.mle <- numeric(nrep) rate.mle <- numeric(nrep) while (rep < nrep) { data <- rgamma(1000,shape=3,rate=0.5) # take a new sample coefs.mle <- fitdistr(data,"gamma")$estimate # compute MLE log.data <- log(data) # log of data log.data.hist <- hist(log.data,breaks=20,plot=FALSE) counts <- log.data.hist$counts mids <- log.data.hist$mids mids <- mids[counts>0] # bin midpoints of binned data, bins with positive counts only counts<-counts[counts>0] # counts of binned data, bin with positive counts only fitted.lse <- lm(log(counts)~mids+I(-exp(mids))) # Note the I(-exp(mids)). This means that the transformation - exp.. # is carried out and the resulting variable is used as explanatory # variable in the least squares fit. Without I(...), the minus sign is # interpreted to mean that we don't want this variable in the model, # as opposed to +, which emans we do want this variable in the model. # The model description "log(counts)~mids+I(-exp(mids))" is not a piece # of R code (R functions, expressions) but a model formula, a string of text, # which is parsed according to some different rules! Try help(model) to # learn about this. coefs.lse <- fitted.lse$coef fits.lse <- fitted.lse$fit fitted.wlse <- lm(log(counts)~mids+I(-exp(mids)),weights=exp(fits.lse)) coefs.wlse <- fitted.wlse$coef rep <- rep+1 shape.lse[rep] <- coefs.lse["mids"] rate.lse[rep] <- coefs.lse["I(-exp(mids))"] shape.wlse[rep] <- coefs.wlse["mids"] rate.wlse[rep] <- coefs.wlse["I(-exp(mids))"] shape.mle[rep] <- coefs.mle["shape"] rate.mle[rep] <- coefs.mle["rate"] } # make another plot of last replicate plotCI(mids,log(counts),uiw=1/(sqrt(counts))) fits.wlse <- fitted.wlse$fit lines(mids,fits.wlse) sqrt(mean((shape.mle-3)^2)) # root mean square error of MLE, shape parameter sqrt(mean((shape.wlse-3)^2)) # root mean square error of WLSE, shape parameter sqrt(mean((rate.mle-0.5)^2)) # root mean square error of MLE, rate parameter sqrt(mean((rate.wlse-0.5)^2)) # root mean square error of WLSE, rate parameter