Hello i am creating a function to calculate the log likelihood and then to do a permutation
fLLR <- function(outcome, sex, genotype, covariates, L0) {
#outcome <- data[,1]
#sex <- data[,2]
#genotype <- data[,3:4]
#fit <- glm(outcome ~ x + sex + covariates$V1 + covariates$V2, family = binomial)
#L0 <- coef(fit)
inter <- 1 # 3 models for the random and skewed XCI
gammaval <- seq(0,2,by = inter)
n <- length(gammaval)
gammaval <- c(gammaval, 3)
LL <- numeric(n + 1)
L <- numeric(n + 1)
t <- cbind(rowSums(genotype), sex, covariates)
mm <- ncol(t)
B <- matrix(0, nrow = n + 1, ncol = mm + 1)
pw <- matrix(0, nrow = n + 1, ncol = mm + 1)
std <- matrix(0, nrow = n + 1, ncol = mm + 1)
for (k in 1:n) {
x <- rowSums(genotype) - 2
x[rowSums(genotype) == 3] <- gammaval[k]
model <- glm(outcome ~ x + sex + covariates, family = "binomial")
LL[k] <- logLik(model)
B[k, ] <- coef(model)
pw[k, ] <- summary(model)$coefficients[,4]
std[k, ] <- summary(model)$coefficients[,2]
}
x <- rowSums(genotype) - 2
x[sex == 1] <- (rowSums(genotype[sex == 1,]) - 2) / 2
model <- glm(outcome ~ x + sex + covariates, family = binomial)
LL[n + 1] <- logLik(model)
B[n + 1, ] <- coef(model)
pw[n + 1, ] <- summary(model)$coefficients[,4]
std[n + 1, ] <- summary(model)$coefficients[,2]
max_LL <- max(LL)
max_gamma <- gammaval[which(LL == max_LL)][1]
if (max_gamma < 1) {
model <- "skewed XCI to normal allele"
} else if (max_gamma == 1) {
model <- "random XCI"
} else if (max_gamma == 3) {
model <- "escape of XCI"
} else {
model <- "skewed XCI to risk allele"
}
i <- which(LL == max_LL)
b <- B[i[1], ]
se <- std[i[1], ]
llr <- L0 - (-2 * max_LL)
results <- c(b[2], se[2], llr)
return(list(model = model, results = results))
}
fperm <- function(outcome, sex, genotype, covariates, N) {
# Permutation of the case-control status within females and males respectively
data_hold <- cbind(outcome, sex, genotype, covariates)
m <- ncol(data_hold)
data_hold1 <- data_hold[data_hold[, "sex"] == 1, ]
nn1 <- nrow(data_hold1)
outcome_hold1 <- data_hold1[, "outcome"]
data_hold0 <- data_hold[data_hold[, "sex"] == 0, ]
nn0 <- nrow(data_hold0)
outcome_hold0 <- data_hold0[, "outcome"]
#N <- 10
results <- matrix(0, nrow = N, ncol = 3)
for (i in 1:N) {
#i = 1
# Convert the numeric value to an integer within the valid range
seed <- sum(as.numeric(Sys.time())) * 1001
seed <- as.integer(seed %% .Machine$integer.max)
# Set the seed
set.seed(seed)
outcome1 <- sample(outcome_hold1, size = nn1, replace = TRUE)
data1 <- data_hold1
data1[, "outcome"] <- outcome1
outcome0 <- sample(outcome_hold0, size = nn0, replace = TRUE)
data0 <- data_hold0
data0[, "outcome"] <- outcome0
data_perm <- rbind(data1, data0)
phen <- data_perm[, "outcome"]
gender <- data_perm[, "sex"]
geno_type <- data_perm[, 3:4]
covar <- data_perm[, -(1:4)]
file <- as.data.frame(cbind(gender + covar))
glm_model <- glm(phen ~ ., family = "binomial", data = file)
L0 <- logLik(glm_model)
results[i, ] <- fLLR(phen, gender, geno_type, covar, L0)
}
return(results)
}
datafx is dataframe with the first column as the binary dependent variable, second column sex, third and fourth column the predictor variable
datafx <- read.table("~/MATLAB/data.txt", quote="\"", comment.char="")
covar <- read.table("~/MATLAB/covariates.txt", quote="\"", comment.char="")
outcome <- datafx[,1]
sex <- datafx[,2]
genotype <- datafx[,3:4]
covariates <- covar[,1:2]
N<- 10
esaai <- fperm(outcome, sex, genotype, covariates[,1], covariates[,2])
the function above is not working properly i dont know why it still gives me the error below
#Error in `[<-`(`*tmp*`, i, , value = fLLR(phen, gender, geno_type, covar, :
# subscript out of bounds
# In addition: Warning message:
# In 1:N : numerical expression has 2000 elements: #only the first used can someone help me ?