function to compute permutation and calculate log likelihood ratio test

14 views Asked by At

Hello i am creating a function to calculate the log likelihood and then to do a permutation

fLLR <- function(outcome, sex, genotype, covariates, L0) {

   #outcome <-  data[,1]
  #sex <- data[,2]
  #genotype <- data[,3:4]
   #fit <- glm(outcome ~ x + sex + covariates$V1 + covariates$V2, family = binomial)
  #L0 <- coef(fit)

  inter <- 1  # 3 models for the random and skewed XCI
  gammaval <- seq(0,2,by = inter)
  n <- length(gammaval)
  gammaval <- c(gammaval, 3)
  LL <- numeric(n + 1)
  L <- numeric(n + 1)
  t <- cbind(rowSums(genotype), sex, covariates)
  mm <- ncol(t)

  B <- matrix(0, nrow = n + 1, ncol = mm + 1)
  pw <- matrix(0, nrow = n + 1, ncol = mm + 1)
  std <- matrix(0, nrow = n + 1, ncol = mm + 1)

  for (k in 1:n) {
   x <- rowSums(genotype) - 2
    x[rowSums(genotype) == 3] <- gammaval[k]

    model <- glm(outcome ~ x + sex + covariates, family = "binomial")

    LL[k] <- logLik(model)
    B[k, ] <- coef(model)
    pw[k, ] <- summary(model)$coefficients[,4]
    std[k, ] <- summary(model)$coefficients[,2]
  }

  x <- rowSums(genotype) - 2
  x[sex == 1] <- (rowSums(genotype[sex == 1,]) - 2) / 2

  model <- glm(outcome ~ x + sex + covariates, family = binomial)

  LL[n + 1] <- logLik(model)

  B[n + 1, ] <- coef(model)
  pw[n + 1, ] <- summary(model)$coefficients[,4]
  std[n + 1, ] <- summary(model)$coefficients[,2]

  max_LL <- max(LL)
  max_gamma <- gammaval[which(LL == max_LL)][1]

  if (max_gamma < 1) {
model <- "skewed XCI to normal allele"
    } else if (max_gamma == 1) {
      model <- "random XCI"
    } else if (max_gamma == 3) {
model <- "escape of XCI"
    } else {
      model <- "skewed XCI to risk allele"
    }

    i <- which(LL == max_LL)
    b <- B[i[1], ]
    se <- std[i[1], ]
    llr <- L0 - (-2 * max_LL)

    results <- c(b[2], se[2], llr)

    return(list(model = model, results = results))
  }



    fperm <- function(outcome, sex, genotype, covariates, N) {
    # Permutation of the case-control status within females and males respectively
    data_hold <- cbind(outcome, sex, genotype, covariates)
    m <- ncol(data_hold)

    data_hold1 <- data_hold[data_hold[, "sex"] == 1, ]
    nn1 <- nrow(data_hold1)
    outcome_hold1 <- data_hold1[, "outcome"]

    data_hold0 <- data_hold[data_hold[, "sex"] == 0, ]
    nn0 <- nrow(data_hold0)
    outcome_hold0 <- data_hold0[, "outcome"]
    #N <- 10
    results <- matrix(0, nrow = N, ncol = 3)
    for (i in 1:N) {
      #i = 1
      # Convert the numeric value to an integer within the valid range
      seed <- sum(as.numeric(Sys.time())) * 1001
      seed <- as.integer(seed %% .Machine$integer.max)

      # Set the seed
      set.seed(seed)

      outcome1 <- sample(outcome_hold1, size = nn1, replace = TRUE)
      data1 <- data_hold1
      data1[, "outcome"] <- outcome1

      outcome0 <- sample(outcome_hold0, size = nn0, replace = TRUE)
      data0 <- data_hold0
      data0[, "outcome"] <- outcome0

      data_perm <- rbind(data1, data0)
      phen <- data_perm[, "outcome"]
      gender <- data_perm[, "sex"]
      geno_type <- data_perm[, 3:4]
      covar <- data_perm[, -(1:4)]
      file <- as.data.frame(cbind(gender + covar))
      glm_model <- glm(phen ~ ., family = "binomial", data = file)
      L0 <- logLik(glm_model)
      results[i, ] <- fLLR(phen, gender, geno_type, covar, L0)
    }

    return(results)
  }

datafx is dataframe with the first column as the binary dependent variable, second column sex, third and fourth column the predictor variable

  datafx <- read.table("~/MATLAB/data.txt", quote="\"", comment.char="")
  covar <- read.table("~/MATLAB/covariates.txt", quote="\"", comment.char="")
  outcome <-  datafx[,1]
  sex <- datafx[,2]
  genotype <- datafx[,3:4]
  covariates <- covar[,1:2]
  N<- 10
  esaai <- fperm(outcome, sex, genotype, covariates[,1], covariates[,2])

the function above is not working properly i dont know why it still gives me the error below

  #Error in `[<-`(`*tmp*`, i, , value = fLLR(phen, gender, geno_type, covar,  : 
  #                                            subscript out of bounds
  #                                          In addition: Warning message:
  #                                            In 1:N : numerical expression has 2000 elements:                                                                     #only the first used     can someone help me ?

         
0

There are 0 answers