I am trying to fit a mixed-effect model with a spatiotemporal correlation structure using lme {nlme}
.
The data comprises 37936 lon (x) + lat (y) grid-cells (18968 male + 18968 female) with density (kde), above/below long-term average rainfall (rainAB) and distance to water (wDist) repeated every year from 2001 to 2010 (yearF=10).
I want to model how kde is influenced by rainAB and wDist for male vs. female animals, while allowing each sexF to have its own intercept and its own slope for yearF, which in turn are all correlated over space (x+y) and time (yearF).
The model runs fine with a small dataset (n=3780) but there are Too large groups in the correlation structure
with the larger dataset (n=379360). I would appreciate any advice on how to deal with this please - See reproducible example below.
# LOAD LIBRARIES
library(nlme)
library(car)
library(dplyr)
library(ggstatsplot)
#----------------------------------------------------------------------------------------------
# SMALL DATASET
set.seed(1)
(nGrid_s = 189) #Number of 1km grid cells in study area
(nYrs = 10) #Number of years
(nGrps = 2) #Number of groups i.e. male and female
dat_s = data.frame(x=rep(seq(from=283, to=401, length.out=nGrid_s),each=nYrs),
y=rep(seq(from=7176, to=7529, length.out=nGrid_s),each=nYrs),
yearF=as.factor(rep(2001:2010, nGrid_s)),
sexF=as.factor(rep(c('male','female'), each=nGrid_s*nYrs)),
kde=runif(nGrid_s*nYrs, 0, 8),
rainAB=runif(nGrid_s*nYrs, -70, 247),
wDist=runif(nGrid_s*nYrs, 0, 14))
str(dat_s)
# LME WITH SMALL DATASET (n=3780)
lmeS = lme(kde~rainAB/sexF+wDist/sexF, #What about how rainAB affects wDist
# random=~y+x|yearF,
random=~yearF|sexF,
correlation=corSpatial(form=~x+y|sexF/yearF),
control=lmeControl(maxIter=50, msMaxIter=50, niterEM=50, opt='optim',msVerbose=TRUE),
method="REML",
data=dat_s)
summary(lmeS)
Anova(lmeS, type=c("III"))
ggcoefstats(x = lmeS, title = "Small LME mixed-effects model")
##############################################################################################
# GENERATE LARGE DATASET
set.seed(2)
(nGrid_l = 18968) #Number of 1km grid cells in study area
dat_l = data.frame(x=rep(seq(from=283, to=401, length.out=nGrid_l),each=nYrs),
y=rep(seq(from=7176, to=7529, length.out=nGrid_l),each=nYrs),
yearF=as.factor(rep(2001:2010, nGrid_l)),
sexF=as.factor(rep(c('male','female'), each=nGrid_l*nYrs)),
kde=runif(nGrid_l*nYrs, 0, 8),
rainAB=runif(nGrid_l*nYrs, -70, 247),
wDist=runif(nGrid_l*nYrs, 0, 14))
# LME WITH LARGE DATASET (n=379360)
lmeL = lme(kde~rainAB/sexF+wDist/sexF, #What about how rainAB affects wDist
# random=~y+x|yearF,
random=~yearF|sexF,
correlation=corSpatial(form=~x+y|sexF/yearF),
control=lmeControl(maxIter=50, msMaxIter=50, niterEM=50, opt='optim',msVerbose=TRUE),
method="REML",
data=dat_l)
# Error: 'sumLenSq := sum(table(groups)^2)' = 1.43914e+10 is too large.
# Too large or no groups in your correlation structure?
#-------------------------------------------------------------------------------