I'm trying to create a multi-species N-mixture model to calculate the abundance of animals using data from three repeated surveys at four different site types (with only one site surveyed for each of the four site types) and examine how site/survey-specific covariates influence each species abundance at each site type.

I've received a few errors that I think I corrected but am now stuck on the following error:

Compilation error on line 22.
Invalid subset expression for beta_cov_site_type
Index expressions on the left hand side of a relation must define a contiguous, increasing set of indices

Here is the code I've used:

# Data array for the counts of 3 species from three repeat surveys at 4 distinct site types
(obs_data <- array(
  data = c(
    c(c(0,0,0), c(5,0,0), c(0,6,0), c(0,0,11)),
    c(c(19,10,0), c(2,27,0), c(0,4,70), c(3,0,1)),
    c(c(14,0,0), c(1,13,0), c(0,0,0), c(0,0,0))
  ),
  dim = c(4, 3, 3)  # 4 sites x 3 surveys x 3 species
))

# Maximum water depth during each survey at each of the 4 site types 
(covariates_site_type <- array(
  data = c(
  20,55,140,
  120,30,55,
  120,100,35,
  45,120,100
  ),
  dim = c(4, 3)  # 4 sites x 3 surveys
))

# Number of species(3), sites(4), surveys(3), and site types(4)
(n_species <- dim(obs_data)[3])
(n_sites <- dim(obs_data)[1])
(n_surveys <- dim(obs_data)[2])
(n_site_types <- dim(covariates_site_type)[1])


model {
  for (s in 1:n_species) {
    for (i in 1:n_sites) {
      for (j in 1:n_surveys) {
        # Abundance model with covariate effect
        lambda[s, i, j] <- exp(beta0[s] + beta_species[s, i] + beta_survey[s, j] + beta_cov_site_type[s, i, j] * covariates_site_type[s, j])
        
        # Likelihood for observed counts
        y[s, i, j] ~ dpois(lambda[s, i, j])
      }
    }
  }
  
  # Priors
  for (s in 1:n_species) {
    beta0[s] ~ dnorm(0, 0.001)
    for (i in 1:n_sites) {
      beta_species[s, i] ~ dnorm(0, 0.001)
    }
    for (j in 1:n_surveys) {
      beta_survey[s, i, j] ~ dnorm(0, 0.001)
      beta_cov_site_type[s, i, j] ~ dnorm(0, 0.001)
    }
  }
  
  # Derived parameters for abundance
  for (s in 1:n_species) {
    for (i in 1:n_sites) {
      for (j in 1:n_surveys) {
        abundance[s, i, j] <- lambda[s, i, j]
      }
    }
  }
}


# Data list
(jags_data <- list(
  n_species = dim(obs_data)[3],
  n_sites = dim(obs_data)[1],
  n_surveys = dim(obs_data)[2],
  covariates_site_type = covariates_site_type,
  y = obs_data
))


inits <- function(){list(beta0 = rnorm(1), beta_species = rnorm(1),
                         beta_survey = rnorm(1), beta_cov_site_type = rnorm(1), 
                         abundance = rnorm(1),
                         N = apply(jags_data$y, 1, max) + 1)}

# Parameters to save
parameters <- c("beta0", "beta_species", "beta_survey", "beta_cov_site_type", "abundance")

# MCMC settings
ni <- 30000
nc <- 3
nb <- 10000
nt <- 2

## Fit model
(fit <- jagsUI::jags(data = jags_data, inits = inits, parameters.to.save = parameters, 
                               model.file = "modelcode.jags", DIC = TRUE,
                               n.chains = nc, n.iter = ni, n.burnin = nb, n.thin = nt,
                               parallel = TRUE))

At this point I got the following error:

Compilation error on line 22.
Unknown variable i
Either supply values for this variable with the data or define it on the left hand side of a relation.

So I attempted to define i within the data list, which is n_sites. I've tried producing arrays and matrices for i.

(site_types = c(1,2,3,4))

(site_types = matrix(c(1,2,3,4)))

(site_types = matrix(c(1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4), nrow=4, ncol=3, byrow=TRUE))


(jags_data <- list(
  n_species = dim(obs_data)[3],
  n_sites = dim(obs_data)[1],
  n_surveys = dim(obs_data)[2],
  covariates_site_type = covariates_site_type,
  i = site_types,
  y = obs_data
))

But in the first two cases I get the same error message:

Compilation error on line 9.
Index out of range taking subset of y

And then when I try and create a 4x3 matrix using

(site_types = matrix(c(1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4), nrow=4, ncol=3, byrow=TRUE))

I get this error:

Compilation error on line 22.
Invalid subset expression for beta_cov_site_type
Index expressions on the left hand side of a relation must define a contiguous, increasing set of indices

I've tried switching the order of s, i, j based on their size but that didn't seem to do anything. I am beyond the extent of my jags coding and troubleshooting abilities.

1

There are 1 answers

0
gregor-fausto On

I think the majority of the issues here stem from how the data and model are indexed. I do some troubleshooting below but it might be useful to pair that with looking over a recap of arrays in R (e.g., https://colinfay.me/intro-to-r/arrays-and-matrices.html) and section 2.6 of the JAGS user manual on node arrays (download here https://sourceforge.net/projects/mcmc-jags/files/Manuals/4.x/).

  1. For the first error, “Compilation error on line 22./Unknown variable i”, the issue comes from the block in the code where that defines the priors. In the original code, the section defining the priors involves nested for loops and is currently written as:
for(s in 1:n_species){
…
for(i in 1:n_sites){
..
}
for(j in 1:n_surveys){
…
}
}
}

The reason that i is undefined here is that the loop for j contains variables that are indexed by i. The for loop with j just needs to be nested within the for loop with i.

  1. The variable beta_survey is indexed as beta_survey[s,j] in the likelihood block but as beta_survey[s,i,j]. I don’t think you need to index this by sites; it should just be beta_survey[s, j] ~ dnorm(0, 0.001) in the prior. I’ve rewritten the prior block so that beta0 is defined in the loop for species; beta_species is defined in the loop for sites, nested in species; beta_cov_site_type is defined in the loop for surveys, nested in sites, nested in species; beta_surveys is defined in the loop for surveys, nested in species.

  2. I think the issues that arise when trying to reformat the indices have less to do with JAGS and more with the organization of the arrays for the data and model. Briefly, the indexing in the data array and the model do not align, which is what’s causing problems. I’ll explain below.

As noted in the comment for the data array, the data are organized in the following way: 4 sites x 3 surveys x 3 species. They’re stored in a 3 dimensional array, with sites in the rows (position 1), surveys in the columns (position 2), and species in the ‘slices’ (position 3). For example, the count for site 1, survey 2, and species 3 would be accessed by obs_data[1,2,3]. In the model code, the observed counts are represented as y[s, i, j], with s species, i sites, and j surveys. The count for site 1, survey 2, and species 3, is thus y[3, 1, 2]. This is not what’s in the data array, which is causing the issue.

You could fix this either by reformatting the data or the code. I personally prefer reformatting the code so that’s what I’ll do here. I just rewrote the likelihood as

y[i, j, s] ~ dpois(lambda[s, i, j])

Doing this leaves all the other parts of the model untouched. I think this will run but for legibility (yourself, others), it might be preferable to align the indices in the data and the model.

  1. Finally, I commented out the initial values in order to get the model to run (inits in the jags call). The dimensions of the initial values need to match those of the parameters in the model. beta0 should be a length 3 vector; beta_species a 3x4 matrix, beta_survey a 3x3 matrix, beta_cov_site_type a 3x4x3 matrix (or whatever the organization of these ends up being if you reformat the data or rewrite the indices in the model). I recommend leaving out the initial values to start and then once you have a better grasp of the indexing issues, add those back in.

Here’s the rewritten script that runs for me. No guarantees that it is producing sensible estimates, just that it runs without errors now.

# Data array for the counts of 3 species from three repeat surveys at 4 distinct site types
(obs_data <- array(
  data = c(
    c(c(0,0,0), c(5,0,0), c(0,6,0), c(0,0,11)),
    c(c(19,10,0), c(2,27,0), c(0,4,70), c(3,0,1)),
    c(c(14,0,0), c(1,13,0), c(0,0,0), c(0,0,0))
  ),
  dim = c(4, 3, 3)  # 4 sites x 3 surveys x 3 species
))

# Maximum water depth during each survey at each of the 4 site types 
(covariates_site_type <- array(
  data = c(
    20,55,140,
    120,30,55,
    120,100,35,
    45,120,100
  ),
  dim = c(4, 3)  # 4 sites x 3 surveys
))

## commented this out; not necessary to define these here
# Number of species(3), sites(4), surveys(3), and site types(4)
# (n_species <- dim(obs_data)[3])
# (n_sites <- dim(obs_data)[1])
# (n_surveys <- dim(obs_data)[2])
# (n_site_types <- dim(covariates_site_type)[1])

## this creates a separate model file; file path may need to be adjusted
sink("modelcode.R")
cat("model {
  for (s in 1:n_species) {
    for (i in 1:n_sites) {
      for (j in 1:n_surveys) {
        # Abundance model with covariate effect
        lambda[s, i, j] <- exp(beta0[s] + beta_species[s, i] + beta_survey[s, j] + beta_cov_site_type[s, i, j] * covariates_site_type[s, j])
        
        # Likelihood for observed counts
        y[i, j, s] ~ dpois(lambda[s, i, j])
      }
    }
  }
  
  # Priors
    for (s in 1:n_species) {
    beta0[s] ~ dnorm(0, 0.001)
    for (i in 1:n_sites) {
      beta_species[s, i] ~ dnorm(0, 0.001)
      for(j in 1:n_surveys) {
        beta_cov_site_type[s, i, j] ~ dnorm(0, 0.001)
      }
    }
    for (j in 1:n_surveys) {
      beta_survey[s, j] ~ dnorm(0, 0.001)
    }
  }
  
  # Derived parameters for abundance
  for (s in 1:n_species) {
    for (i in 1:n_sites) {
      for (j in 1:n_surveys) {
        abundance[s, i, j] <- lambda[s, i, j]
      }
    }
  }
}",fill = TRUE)
sink()

# Data list
(jags_data <- list(
  n_species = dim(obs_data)[3],
  n_sites = dim(obs_data)[1],
  n_surveys = dim(obs_data)[2],
  covariates_site_type = covariates_site_type,
  y = obs_data
))


inits <- function(){list(beta0 = rnorm(1), beta_species = rnorm(1),
                         beta_survey = rnorm(1), beta_cov_site_type = rnorm(1), 
                         abundance = rnorm(1),
                         N = apply(jags_data$y, 1, max) + 1)}

# Parameters to save
parameters <- c("beta0", "beta_species", "beta_survey", "beta_cov_site_type", "abundance")

# MCMC settings
ni <- 30000
nc <- 3
nb <- 10000
nt <- 2

## Fit model
(fit <- jagsUI::jags(data = jags_data, 
                     #inits = inits, 
                     parameters.to.save = parameters, 
                     model.file = "modelcode.R", DIC = TRUE,
                     n.chains = nc, n.iter = ni, n.burnin = nb, n.thin = nt,
                     parallel = TRUE))