I have a data frame that looks like this:
y s
1 cat
0 cat
0 dog
0 dog
0 dog
0 dog
0 dog
0 dog
0 dog
1 dog
1 cat
0 dog
0 dog
1 dog
0 dog
1 dog
0 dog
0 dog
0 dog
0 dog
0 dog
0 dog
1 cat
1 dog
0 cat
1 dog
0 dog
1 dog
1 cat
1 dog
1 dog
1 dog
1 cat
1 cat
1 dog
1 dog
0 cat
1 dog
0 dog
0 cat
1 dog
1 dog
0 dog
1 dog
0 cat
1 dog
1 dog
1 cat
1 cat
1 dog
0 dog
1 cat
1 dog
0 dog
0 dog
1 dog
1 cat
1 dog
0 dog
0 dog
0 dog
0 dog
1 cat
1 dog
1 dog
1 dog
1 cat
1 cat
1 dog
1 dog
0 cat
1 dog
0 dog
0 cat
1 dog
1 dog
0 dog
1 dog
0 cat
1 dog
1 dog
1 cat
1 cat
1 dog
0 dog
1 cat
1 dog
0 dog
0 dog
1 dog
1 cat
1 dog
0 dog
0 dog
0 dog
0 dog
1 cat
I am performing an MCMC algorithm using jags but I always get the error:
Error in jags.model("TEMPmodel.txt", data = dataList, inits = initsList, : RUNTIME ERROR: Compilation error on line 4. Index out of range taking subset of theta
This is the source file:
Ssubj-Mbernbeta.R
# Accompanies the book:
# Kruschke, J. K. (2014). Doing Bayesian Data Analysis:
# A Tutorial with R, JAGS, and Stan. 2nd Edition. Academic Press / Elsevier.
source("DBDA2E-utilities.R")
#===============================================================================
genMCMC = function( data , numSavedSteps=50000 , saveName=NULL ) {
require(rjags)
#-----------------------------------------------------------------------------
# THE DATA.
# N.B.: This function expects the data to be a data frame,
# with one component named y being a vector of integer 0,1 values,
# and one component named s being a factor of subject identifiers.
y = data$y
s = as.numeric(data$s) # converts character to consecutive integer levels
# Do some checking that data make sense:
if ( any( y!=0 & y!=1 ) ) { stop("All y values must be 0 or 1.") }
Ntotal = length(y)
Nsubj = length(unique(s))
# Specify the data in a list, for later shipment to JAGS:
dataList = list(
y = y ,
s = s ,
Ntotal = Ntotal ,
Nsubj = Nsubj
)
#-----------------------------------------------------------------------------
# THE MODEL.
modelString = "
model {
for ( i in 1:Ntotal ) {
y[i] ~ dbern( theta[s[i]] )
}
for ( sIdx in 1:Nsubj ) {
theta[sIdx] ~ dbeta( 2 , 2 ) # N.B.: 2,2 prior; change as appropriate.
}
}
"
# close quote for modelString
writeLines( modelString , con="TEMPmodel.txt" )
#-----------------------------------------------------------------------------
# INTIALIZE THE CHAINS.
# Initial values of MCMC chains based on data:
# Option 1: Use single initial value for all chains:
# thetaInit = rep(0,Nsubj)
# for ( sIdx in 1:Nsubj ) { # for each subject
# includeRows = ( s == sIdx ) # identify rows of this subject
# yThisSubj = y[includeRows] # extract data of this subject
# thetaInit[sIdx] = sum(yThisSubj)/length(yThisSubj) # proportion
# }
# initsList = list( theta=thetaInit )
# Option 2: Use function that generates random values near MLE:
initsList = function() {
thetaInit = rep(0,Nsubj)
for ( sIdx in 1:Nsubj ) { # for each subject
includeRows = ( s == sIdx ) # identify rows of this subject
yThisSubj = y[includeRows] # extract data of this subject
resampledY = sample( yThisSubj , replace=TRUE ) # resample
thetaInit[sIdx] = sum(resampledY)/length(resampledY)
}
thetaInit = 0.001+0.998*thetaInit # keep away from 0,1
return( list( theta=thetaInit ) )
}
#-----------------------------------------------------------------------------
# RUN THE CHAINS
parameters = c( "theta") # The parameters to be monitored
adaptSteps = 500 # Number of steps to adapt the samplers
burnInSteps = 500 # Number of steps to burn-in the chains
nChains = 4 # nChains should be 2 or more for diagnostics
thinSteps = 1
nIter = ceiling( ( numSavedSteps * thinSteps ) / nChains )
# Create, initialize, and adapt the model:
jagsModel = jags.model( "TEMPmodel.txt" , data=dataList , inits=initsList ,
n.chains=nChains , n.adapt=adaptSteps )
# Burn-in:
cat( "Burning in the MCMC chain...\n" )
update( jagsModel , n.iter=burnInSteps )
# The saved MCMC chain:
cat( "Sampling final MCMC chain...\n" )
codaSamples = coda.samples( jagsModel , variable.names=parameters ,
n.iter=nIter , thin=thinSteps )
# resulting codaSamples object has these indices:
# codaSamples[[ chainIdx ]][ stepIdx , paramIdx ]
if ( !is.null(saveName) ) {
save( codaSamples , file=paste(saveName,"Mcmc.Rdata",sep="") )
}
return( codaSamples )
} # end function
#===============================================================================
smryMCMC = function( codaSamples , compVal=0.5 , rope=NULL ,
compValDiff=0.0 , ropeDiff=NULL , saveName=NULL ) {
mcmcMat = as.matrix(codaSamples,chains=TRUE)
Ntheta = length(grep("theta",colnames(mcmcMat)))
summaryInfo = NULL
rowIdx = 0
for ( tIdx in 1:Ntheta ) {
parName = paste0("theta[",tIdx,"]")
summaryInfo = rbind( summaryInfo ,
summarizePost( mcmcMat[,parName] , compVal=compVal , ROPE=rope ) )
rowIdx = rowIdx+1
rownames(summaryInfo)[rowIdx] = parName
}
for ( t1Idx in 1:(Ntheta-1) ) {
for ( t2Idx in (t1Idx+1):Ntheta ) {
parName1 = paste0("theta[",t1Idx,"]")
parName2 = paste0("theta[",t2Idx,"]")
summaryInfo = rbind( summaryInfo ,
summarizePost( mcmcMat[,parName1]-mcmcMat[,parName2] ,
compVal=compValDiff , ROPE=ropeDiff ) )
rowIdx = rowIdx+1
rownames(summaryInfo)[rowIdx] = paste0(parName1,"-",parName2)
}
}
if ( !is.null(saveName) ) {
write.csv( summaryInfo , file=paste(saveName,"SummaryInfo.csv",sep="") )
}
show( summaryInfo )
return( summaryInfo )
}
#===============================================================================
plotMCMC = function( codaSamples , data , compVal=0.5 , rope=NULL ,
compValDiff=0.0 , ropeDiff=NULL ,
saveName=NULL , saveType="jpg" ) {
#-----------------------------------------------------------------------------
# N.B.: This function expects the data to be a data frame,
# with one component named y being a vector of integer 0,1 values,
# and one component named s being a factor of subject identifiers.
y = data$y
s = as.numeric(data$s) # converts character to consecutive integer levels
# Now plot the posterior:
mcmcMat = as.matrix(codaSamples,chains=TRUE)
chainLength = NROW( mcmcMat )
Ntheta = length(grep("theta",colnames(mcmcMat)))
openGraph(width=2.5*Ntheta,height=2.0*Ntheta)
par( mfrow=c(Ntheta,Ntheta) )
for ( t1Idx in 1:(Ntheta) ) {
for ( t2Idx in (1):Ntheta ) {
parName1 = paste0("theta[",t1Idx,"]")
parName2 = paste0("theta[",t2Idx,"]")
if ( t1Idx > t2Idx) {
# plot.new() # empty plot, advance to next
par( mar=c(3.5,3.5,1,1) , mgp=c(2.0,0.7,0) )
nToPlot = 700
ptIdx = round(seq(1,chainLength,length=nToPlot))
plot ( mcmcMat[ptIdx,parName2] , mcmcMat[ptIdx,parName1] , cex.lab=1.75 ,
xlab=parName2 , ylab=parName1 , col="skyblue" )
} else if ( t1Idx == t2Idx ) {
par( mar=c(3.5,1,1,1) , mgp=c(2.0,0.7,0) )
postInfo = plotPost( mcmcMat[,parName1] , cex.lab = 1.75 ,
compVal=compVal , ROPE=rope , cex.main=1.5 ,
xlab=parName1 , main="" )
includeRows = ( s == t1Idx ) # identify rows of this subject in data
dataPropor = sum(y[includeRows])/sum(includeRows)
points( dataPropor , 0 , pch="+" , col="red" , cex=3 )
} else if ( t1Idx < t2Idx ) {
par( mar=c(3.5,1,1,1) , mgp=c(2.0,0.7,0) )
postInfo = plotPost(mcmcMat[,parName1]-mcmcMat[,parName2] , cex.lab = 1.75 ,
compVal=compValDiff , ROPE=ropeDiff , cex.main=1.5 ,
xlab=paste0(parName1,"-",parName2) , main="" )
includeRows1 = ( s == t1Idx ) # identify rows of this subject in data
dataPropor1 = sum(y[includeRows1])/sum(includeRows1)
includeRows2 = ( s == t2Idx ) # identify rows of this subject in data
dataPropor2 = sum(y[includeRows2])/sum(includeRows2)
points( dataPropor1-dataPropor2 , 0 , pch="+" , col="red" , cex=3 )
}
}
}
#-----------------------------------------------------------------------------
if ( !is.null(saveName) ) {
saveGraph( file=paste(saveName,"Post",sep=""), type=saveType)
}
}
#===============================================================================
And this is code I used from the original source file:
# Load the data
myData = read.csv("dogcats_polliberal.csv") # myData is a data frame.
# Load the functions genMCMC, smryMCMC, and plotMCMC:
source("Jags-Ydich-XnomSsubj-MbernBeta.R")
# Generate the MCMC chain:
mcmcCoda = genMCMC( data = myData , numSavedSteps=50000 )
# Display diagnostics of chain, for specified parameter:
diagMCMC( mcmcCoda , parName="theta[1]" )
# Display numerical summary statistics of chain:
smryMCMC( mcmcCoda , compVal=NULL , compValDiff=0.0 )
# Display graphical posterior information:
plotMCMC( mcmcCoda , data=myData , compVal=NULL , compValDiff=0.0 )
The traceback error comes from the line in the original source file
jagsModel = jags.model( "TEMPmodel.txt" , data=dataList , inits=initsList ,
n.chains=nChains , n.adapt=adaptSteps )
Can someone help me figure out why I get the error:
Also, I changed the s variable to numeric in the data file but I don't show it here. Keeping it as characters also gives another error.
The problem might be this:
Text above was copied from this blog post, http://doingbayesiandataanalysis.blogspot.com/2020/09/fixing-new-problem-in-some-dbda2e.html