Phylogenetic Diversity of parasites in biogeographic regions ERROR in lm.fit

19 views Asked by At

I have a phylogenetic tree of parasites ("plas_trees_reduced"), and I am trying to calculate the diversity of these parasites captured in 12 biogeographic regions. I am following and adapting code from Clark et al (https://doi.org/10.1111/mec.15545) from: https://figshare.com/articles/dataset/R_code_and_datasets_to_replicate_analyses_in_Clark_et_al_in_review_/11862633?file=21744279.

Calculating diversity for each genus is the goal, and the current issue is with one genus only. The matrix ("site_plas_matt") is essentially which haplotypes were captured at each region, and how many times. I'm not sure how to make my tree file accessible for being reproducible. Its a consensus tree and not huge, here is a context link:https://ctxt.io/2/AABIAa0VFg

I'm using a .tre file, and using NA's for undesired tree tips, then dropping those tips and visualizing just to make sure. this is the code I'm using to do this, and it works just fine for me.

    plasNAs <- ape::read.nexus("Parasite_Phylogenies/PlasBayesNAs.tre",
                         force.multi = TRUE)
plot(plasNAs)
plot(drop.tip(plasNAs, tip = "NA"))
plas_trees_reduced <- drop.tip(plasNAs, tip = "NA")
plot(plas_trees_reduced)

here is my matrix:

     |X1|X2|X3|X4|X5|X6|X7|X8|X9|X10|X11|X12|
Hp001| 0| 0| 0| 1| 0| 1| 0| 1| 1|  3|  6|  2|
Hp002| 0| 0| 0| 0| 0| 0| 1| 0| 0|  1|  8|  0|
Hp003| 0| 0| 0| 0| 1| 0| 0| 1| 0|  2|  1|  0|
Hp004| 0| 0| 0| 0| 0| 0| 0| 0| 0|  1|  1|  0|
Hp005| 0| 0| 0| 0| 0| 0| 0| 0| 0|  0|  1|  3|
Hp006| 0| 0| 0| 0| 0| 0| 0| 0| 0|  1|  1|  0|
Hp007| 0| 0| 0| 0| 0| 2| 0| 0| 0|  0|  2|  0|
Hp008| 1| 0| 0| 4| 0| 0| 1| 0| 0|  0|  1|  1|
Hp009| 0| 0| 0| 0| 0| 0| 0| 0| 0|  2|  0|  0|
Hp010| 0| 0| 0| 0| 0| 0| 0| 0| 0|  1|  0|  0|
Hp011| 0| 0| 0| 0| 0| 0| 0| 0| 0|  1|  0|  0|
Hp012| 0| 0| 0| 0| 0| 0| 0| 0| 0|  1|  0|  0|
Hp013| 0| 0| 0| 0| 0| 0| 0| 0| 0|  1|  0|  0|
Hp014| 0| 0| 0| 0| 0| 0| 0| 0| 0|  1|  0|  0|
Hp015| 0| 0| 0| 0| 0| 0| 0| 0| 0|  0|  1|  0|
Hp016| 0| 0| 0| 0| 0| 0| 0| 0| 0|  0|  1|  0|
Hp017| 0| 0| 0| 0| 0| 0| 0| 0| 0|  0|  1|  0|
Hp018| 0| 0| 0| 0| 0| 0| 0| 0| 0|  0|  1|  0|
Hp019| 0| 0| 0| 0| 0| 1| 0| 0| 0|  0|  0|  0|
Hp020| 0| 0| 0| 0| 0| 1| 0| 0| 0|  0|  0|  0|
Hp021| 0| 0| 0| 0| 0| 1| 0| 0| 0|  0|  0|  0|
Hp022| 1| 0| 0| 0| 0| 0| 0| 0| 0|  0|  0|  0|
Hp023| 1| 0| 0| 0| 0| 0| 0| 0| 0|  0|  0|  0|
Hp024| 1| 0| 0| 0| 0| 0| 0| 0| 0|  0|  0|  0|
Hp025| 2| 0| 0| 0| 0| 0| 0| 0| 0|  0|  0|  0|
Hp026| 0| 0| 0| 1| 0| 0| 0| 0| 0|  0|  0|  0|
Hp027| 0| 0| 0| 1| 0| 0| 0| 0| 0|  0|  0|  0|
Hp028| 0| 0| 0| 1| 0| 0| 0| 0| 0|  0|  0|  0|
Hp029| 0| 0| 0| 0| 0| 0| 0| 1| 0|  0|  0|  0|
Hp030| 0| 0| 0| 0| 0| 0| 0| 1| 0|  0|  0|  0|
Hp031| 0| 0| 0| 0| 0| 0| 0| 0| 0|  0|  0|  1|
Hp032| 0| 0| 0| 0| 0| 0| 0| 0| 0|  0|  0|  1|
Hp033| 0| 0| 0| 0| 0| 0| 0| 0| 0|  0|  0|  1|
Hp034| 0| 0| 0| 0| 0| 0| 0| 0| 0|  0|  0|  1|
Hp035| 0| 0| 0| 0| 0| 0| 0| 0| 0|  0|  1|  0|
Hp036| 0| 0| 0| 0| 0| 0| 0| 0| 0|  0|  1|  0|
Hp037| 0| 0| 0| 0| 1| 0| 0| 0| 0|  0|  0|  0|
Hp038| 0| 0| 0| 0| 1| 0| 0| 0| 0|  0|  0|  0|
Hp039| 0| 0| 0| 0| 1| 0| 0| 0| 0|  0|  0|  0|
Hp040| 0| 0| 0| 0| 0| 0| 0| 1| 0|  0|  0|  0|
Hp041| 0| 1| 0| 0| 0| 0| 0| 0| 0|  0|  0|  0|
  divs_plas<-lapply(seq_len(100), function(x) {
    cat("plas_trees_reduced",x, "out of 1")
        tree<-ape::write.tree(plas_trees_reduced[[x]],file="",
                    append=FALSE,digits=10)
        tree<-ade4::newick2phylog(tree)
        paras_names<-names(tree$leaves)
        site_plas_matt<-site_plas_matt[paras_names,]
        too_few<-which(colSums(site_plas_matt)<4)
        divs<-iNextPD::iNextPD(x=site_plas_matt[,-too_few],
                        labels=paras_names,phy=tree,q=0,datatype="abundance")
        divs<-data.frame(divs$AsyPDEst)%>%
        purrr::set_names(c("Cluster","q","Measure","Diversity"))%>%
        dplyr::filter(q=="q=1")%>%dplyr::filter(Measure=="Estimator")%>%
        dplyr::mutate(Diversity=as.vector(scale(Diversity)))%>%
        dplyr::select(-q,-Measure)

  diversities<-do.call(rbind,divs_plas)
})

My error code is:

Error in lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) : NA/NaN/Inf in 'y' Called from: lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...)

I've checked for simple errors in my data, for instance, every haplotype is present in at least 1 region, and regions with >4 parasites should be excluded anyway.

Debug location is definitely approximate:

     function (x, y, offset = NULL, method = "qr", tol = 1e-07, singular.ok = TRUE, 
           ...) 
 {
   if (is.null(n <- nrow(x))) 
     stop("'x' must be a matrix")
   if (n == 0L) 
     stop("0 (non-NA) cases")
   p <- ncol(x)
   if (p == 0L) {
     return(list(coefficients = numeric(), residuals = y, 
                 fitted.values = 0 * y, rank = 0, df.residual = length(y)))
   }
   ny <- NCOL(y)
   if (is.matrix(y) && ny == 1) 
     y <- drop(y)
   if (!is.null(offset)) 
     y <- y - offset
   if (NROW(y) != n) 
     stop("incompatible dimensions")
   if (method != "qr") 
     warning(gettextf("method = '%s' is not supported. Using 'qr'", 
                      method), domain = NA)
   chkDots(...)
   z <- .Call(C_Cdqrls, x, y, tol, FALSE)
   if (!singular.ok && z$rank < p) 
     stop("singular fit encountered")
   coef <- z$coefficients
   pivot <- z$pivot
   r1 <- seq_len(z$rank)
   dn <- colnames(x) %||% paste0("x", 1L:p)
   nmeffects <- c(dn[pivot[r1]], rep.int("", n - z$rank))
   r2 <- if (z$rank < p) 
     (z$rank + 1L):p
   else integer()
   if (is.matrix(y)) {
     coef[r2, ] <- NA
     if (z$pivoted) 
       coef[pivot, ] <- coef
     dimnames(coef) <- list(dn, colnames(y))
     dimnames(z$effects) <- list(nmeffects, colnames(y))
   }
   else {
     coef[r2] <- NA
     if (z$pivoted) 
       coef[pivot] <- coef
     names(coef) <- dn
     names(z$effects) <- nmeffects
   }
   z$coefficients <- coef
   r1 <- y - z$residuals
   if (!is.null(offset)) 
     r1 <- r1 + offset
   if (z$pivoted) 
     colnames(z$qr) <- colnames(x)[z$pivot]
   qr <- z[c("qr", "qraux", "pivot", "tol", "rank")]
   c(z[c("coefficients", "residuals", "effects", "rank")], 
     list(fitted.values = r1, assign = attr(x, "assign"), 
          qr = structure(qr, class = "qr"), df.residual = n - 
            z$rank))
 }     

Any help is greatly appreciated.

I have tried forcing the data through as a matrix instead of a dataframe

      siteplasmatnum <- data.matrix(site_plas_matt, rownames.force = TRUE)


      divs_plas<-lapply(seq_len(100), function(x) {
    cat("plas_trees_reduced",x, "out of 1")
    tree<-ape::write.tree(plas_trees_reduced[[x]],file="",
                          append=FALSE,digits=10)
    tree<-ade4::newick2phylog(tree)
    paras_names<-names(tree$leaves)
    siteplasmatnum<-siteplasmatnum[paras_names,]
    too_few<-which(colSums(siteplasmatnum)<4)
    divs<-iNextPD::iNextPD(x=siteplasmatnum[,-too_few],
                           labels=paras_names,phy=tree,q=0,datatype="abundance")
    divs<-data.frame(divs$AsyPDEst)%>%
      purrr::set_names(c("Cluster","q","Measure","Diversity"))%>%
      dplyr::filter(q=="q=1")%>%dplyr::filter(Measure=="Estimator")%>%
      dplyr::mutate(Diversity=as.vector(scale(Diversity)))%>%
      dplyr::select(-q,-Measure)
  })
  diversities<-do.call(rbind,divs_plas)
}

This produces an error of: Error in if (class(x) == "matrix" | class(x) == "data.frame") { : the condition has length > 1 Called from: iNextPD::iNextPD(x = siteplasmatnum[, -too_few], labels = paras_names, phy = tree, q = 0, datatype = "abundance")

and debug location is as follows:

  endpoint = NULL, knots = 40, se = FALSE, conf = 0.95, nboot = 50) 
{
  datatype <- check_datatype(datatype)
  if (datatype == "incidence_freq" | datatype == "incidence") 
    stop("only support datatype=\"incidence_raw\"")
  Fun <- function(x, q) {
`    if (datatype == "abundance") {
      if (sum(x) == 0) 
        stop("Zero abundance counts in one or more sample sites")`
      x <- as.numeric(unlist(x))
      out <- iNextPD.Ind(x, labels, phy, q, size, endpoint = ifelse(is.null(endpoint), 
        2 * sum(x), endpoint), knots, se, conf, nboot)
    }
    if (datatype == "incidence_raw") {
      y <- iNEXT::as.incfreq(x)
      t <- y[1]
      y <- y[-1]
      if (t > sum(y)) {
        warning("Insufficient data to provide reliable estimators and associated s.e.")
      }
      if (sum(y) == 0) 
        stop("Zero incidence frequencies in one or more sample sites")
      out <- iNextPD.Sam(x, labels, phy, q, size, endpoint = ifelse(is.null(endpoint), 
        2 * max(x), endpoint), knots, se, conf, nboot)
    }
    out
  }
  if (class(q) != "numeric") 
    stop("invlid class of order q, q should be a postive value/vector of numeric object")
  if (min(q) < 0) {
    warning("ambigous of order q, we only compute postive q")
    q <- q[q >= 0]
  }
  if (datatype == "abundance") {
    if (class(x) == "matrix" | class(x) == "data.frame") {
      name_x <- colnames(x)
      x <- lapply(seq_len(ncol(x)), function(i) x[, i])
      names(x) <- name_x
    }
  }
  if (datatype == "abundance") {
    if (class(x) == "numeric" | class(x) == "integer" | 
      class(x) == "double") {
      out <- do.call("rbind", lapply(q, function(q) Fun(x, 
        q)))
      out[, -(1:3)] <- round(out[, -(1:3)], 3)
      index <- rbind(as.matrix(estPD(x, labels, phy, q = 0, 
        datatype, se = TRUE, conf = conf)), as.matrix(estPD(x, 
        labels, phy, q = 1, datatype, se = TRUE, conf = conf)), 
        as.matrix(estPD(x, labels, phy, q = 2, datatype, 
          se = TRUE, conf = conf)))
      rownames(index) <- c("q = 0", "q = 1", "q = 2")
    }
    else if (class(x) == "list") {
      out <- lapply(x, function(x) {
        tmp <- do.call("rbind", lapply(q, function(q) Fun(x, 
          q)))
        tmp[, -(1:3)] <- round(tmp[, -(1:3)], 3)
        tmp
      })
      arr <- array(0, dim = c(3, 5, length(x)))
      arr[1, , ] <- t(as.matrix(estPD(x, labels, phy, 
        q = 0, datatype, se = TRUE, conf = conf)))
      arr[2, , ] <- t(as.matrix(estPD(x, labels, phy, 
        q = 1, datatype, se = TRUE, conf = conf)))
      arr[3, , ] <- t(as.matrix(estPD(x, labels, phy, 
        q = 2, datatype, se = TRUE, conf = conf)))
      dimnames(arr)[[3]] <- names(x)
      dimnames(arr)[[1]] <- c("q = 0", "q = 1", "q = 2")
      dimnames(arr)[[2]] <- c("Observed", "Estimator", 
        "Est_s.e.", "95% Lower", "95% Upper")
      index <- ftable(arr, row.vars = c(3, 1))
    }
    else {
      stop("invalid class of x, x should be a object of numeric, matrix, data.frame, or list")
    }
  }
  else if (datatype == "incidence_raw") {
    if (class(x) == "matrix" | class(x) == "data.frame") {
      out <- do.call("rbind", lapply(q, function(q) Fun(x, 
        q)))
      out[, -(1:3)] <- round(out[, -(1:3)], 3)
      index <- rbind(as.matrix(estPD(x, labels, phy, q = 0, 
        datatype, se = TRUE, conf = conf)), as.matrix(estPD(x, 
        labels, phy, q = 1, datatype, se = TRUE, conf = conf)), 
        as.matrix(estPD(x, labels, phy, q = 2, datatype, 
          se = TRUE, conf = conf)))
      rownames(index) <- c("q = 0", "q = 1", "q = 2")
    }
    else if (class(x) == "list") {
      out <- lapply(x, function(x) {
        tmp <- do.call("rbind", lapply(q, function(q) Fun(x, 
          q)))
        tmp[, -(1:3)] <- round(tmp[, -(1:3)], 3)
        tmp
      })
      arr <- array(0, dim = c(3, 5, length(x)))
      arr[1, , ] <- t(as.matrix(estPD(x, labels, phy, 
        q = 0, datatype, se = TRUE, conf = conf)))
      arr[2, , ] <- t(as.matrix(estPD(x, labels, phy, 
        q = 1, datatype, se = TRUE, conf = conf)))
      arr[3, , ] <- t(as.matrix(estPD(x, labels, phy, 
        q = 2, datatype, se = TRUE, conf = conf)))
      dimnames(arr)[[3]] <- names(x)
      dimnames(arr)[[1]] <- c("q = 0", "q = 1", "q = 2")
      dimnames(arr)[[2]] <- c("Observed", "Estimator", 
        "Est_s.e.", "95% Lower", "95% Upper")
      index <- ftable(arr, row.vars = c(3, 1))
    }
  }
  info <- DataInfo(x, datatype)
  tree <- ExpandData(x, labels, phy, datatype)
  z <- list(DataInfo = info, iNextPDEst = out, AsyPDEst = index, 
    ExpandData = tree)
  class(z) <- c("iNextPD")
  return(z)
}

Then I removed column 3 as that had zero parasites, but I get the same error.

please and thank you to anyone who tries to help me tackle this problem.

0

There are 0 answers