# ############### Bayesian Model Averaging examples ############### # # This script assumes you have worked through all the previous notes from # the web page and you have downloaded, installed, and updated all available # R packages. ### Upload the SAS SEM Example data ("SEMData.sav") which is SIMULATION DATA. library(foreign) exsem <- read.spss("http://www.unt.edu/rss/class/Jon/R_SC/Module10/SEMData.sav", use.value.labels=TRUE, max.value.labels=Inf, to.data.frame=TRUE) summary(exsem) head(exsem) cor(exsem) # The goal of the this example is to choose the best set of predictor variables for a linear (OLS) prediction # model with Extroversion (extro) as the outcome. # Basic linear (OLS) regression model. reg.1 <- lm(extro~abstruse+agree+block+cognitive+common+cultural+open+physical+series+sets+social+vocab, data=exsem) summary(reg.1) # The BMA (Bayesian Model Averaging) package/library was designed specifically to use Bayesian Model # Averaging to address the variable selection problem in the context of several types of models (e.g. # GLM, LM, and Survival Models. library(BMA) # The function for conducting BMA with respect to linear regression is 'bicreg'. # The 'bicreg' function requires a matrix of predictor variables as input. attach(exsem) predictors <- as.matrix(cbind(open, agree, social, cognitive, physical, cultural, vocab, abstruse, block, common, sets, series)) detach(exsem) # Conduct the BMA using the 'bicreg' function by submitting the matrix of predictors (predictors) # and the outcome variable (exsem$extro). bma1 <- bicreg(predictors, exsem$extro) summary(bma1) # Based on the first column of the output we can see that "open", "agree", and "series" are the most # important variables; the column 'p!=0' indicates the percentage/probability that the coefficient # for a given predictor is NOT zero. We can also see that the first model 'model 1' (which includes # only 'open', 'agree', & 'series') is the best because it has the lowest BIC and the largest # posterior probability. # The 'ols' part of the output (not printed by default) gives a matrix, with each model as a row and # each predictor variable as a column; listing the estimated (OLS) coefficient for each variable in # a given model. bma1$ols # Likewise, the 'se' part of the output produces a similar matrix, with the standard errors for each # coefficient (for each variable/model combination). bma1$se # The 'postmean' part of the output (not printed by default) contains the average posterior coefficient # for each predictor. The 'postsd' provides the standard deviation of each average posterior coefficient. bma1$postmean bma1$postsd # The 'which' part of the output (not provided by default) contains a matrix, with each model as a row and # each predictor variable as a column; listing whether a variable was included in each model. bma1$which # The BMA package also contains a plot function for displaying the posterior distributions of the # coefficients. plot(bma1) # For a complete description of the 'bicreg' function: help(bicreg) # Bayesian model averaging can also be conducted when attempting to identify the best set of predictors # for a Generalized Linear Model. The 'bic.glm' function is very similar to 'bicreg'; you must # supply a matrix or data frame of the independent variables (predictors) and the outcome variable. # Results of the following model mirror those above. However, the obvious benefit of the 'bic.glm' function # is the ability to specify non-normal error distributions (i.e. non-Gaussian; e.g. binomial). bma2 <- bic.glm(predictors, exsem$extro, glm.family = "gaussian") summary(bma2) # Notice that when specifying "Gaussian" the estimation of the posterior standard # deviations is slightly off; therefore, it is best to use 'bicreg' when family = # "Gaussian". bma1$postsd bma2$postsd plot(bma2) # For a complete description of the 'bic.glm' function and its arguments: help(bic.glm) ### Example of Binomial Logistic Regression using 'bic.glm'. # Read in the data: logreg <- read.table("http://www.unt.edu/rss/class/Jon/R_SC/Module9/logreg1.txt", header=TRUE, sep="", na.strings="NA", dec=".", strip.white=TRUE) summary(logreg) # Create a matrix of the predictor variables. attach(logreg) predictors.logist <- as.matrix(cbind(x1,x2,x3,x4)) detach(logreg) # Run the 'bic.glm' function specifying the binomial family. bma2 <- bic.glm(predictors.logist, logreg$y, glm.family = "binomial") summary(bma2) plot(bma2) ### Example of Multinomial Logistic Regression using library 'mlogitBMA' and function 'bic.mlogit'. library(mlogitBMA) # Read in the data from the web (data is an SPSS.sav file, so the 'foreign' package is necessary). library(foreign) mdata1 <- read.spss("http://www.unt.edu/rss/class/Jon/R_SC/Module9/MultiNomReg.sav", use.value.labels=TRUE, max.value.labels=Inf, to.data.frame=TRUE) summary(mdata1) # Apply the 'bic.logit' function; supplying the formula in standard format, choices represent # the choices on the outcome variable (i.e. the categories of the outcome). mlog.1 <- bic.mlogit(y ~ x1 + x2 + X3, data = mdata1, choices = 1:3) summary(mlog.1) # To see all the arguments of the 'bic.mlogit' function help(bic.mlogit) # End; Feb, 2011
source
No comments:
Post a Comment