A way to extract the name of subarray that was sampled in R

75 views Asked by At

I have a simulation that I have constructed and need to refer to a given subarray that was randomly sampled from in order to create some plots using a for loop. I am not quite certain how to go about accomplishing this...

Here is my code:

K <- 2       # number of subarrays
N <- 100 
Hstar <- 10

perms <- 10000

p <- 0.95

specs <- 1:N 

pop <- array(dim = c(c(perms, N), K))

haps <- as.character(1:Hstar)

probs <- rep(1/Hstar, Hstar) 

for(j in 1:perms){
    for(i in 1:K){ 
        if(i == 1){
            pop[j, specs, i] <- sample(haps, size = N, replace = TRUE, prob = probs)
    }
        else{
            K1 <- c(1:5)
            K2 <- c(6:10)
            pop[j ,, 1] <- sample(haps[K1], size = N, replace = TRUE, prob = probs[K1])
            pop[j ,, 2] <- sample(haps[K2], size = N, replace = TRUE, prob = probs[K2]) 
        }
    }
}

HAC.mat <- array(dim = c(c(perms, N), K))

for(k in specs){
    for(j in 1:perms){
        for(i in 1:K){ 
            ind.index <- sample(specs, size = k, replace = FALSE)
            hap.plot <- pop[sample(1:nrow(pop), size = 1, replace = TRUE),     ind.index, sample(1:K, size = 1, replace = TRUE)]
            HAC.mat[j, k, i] <- length(unique(hap.plot))
    }
  }
}

means <- apply(HAC.mat, MARGIN = 2, mean)
lower <- apply(HAC.mat, MARGIN = 2, function(x) quantile(x, 0.025))
upper <- apply(HAC.mat, MARGIN = 2, function(x) quantile(x, 0.975))

par(mfrow = c(1, 2))

for(i in 1:K){
    if(i == 1){
        plot(specs, means, type = "n", xlab = "Specimens sampled", ylab = "Unique haplotypes", ylim = c(1, Hstar))
        polygon(x = c(specs, rev(specs)), y = c(lower, rev(upper)), col = "gray")
        lines(specs, means, lwd = 2)
        HAC.bar <- barplot(N*probs, xlab = "Unique haplotypes", ylab = "Specimens sampled", names.arg = 1:Hstar)

}
    else{
        plot(specs, means, type = "n", xlab = "Specimens sampled", ylab = "Unique haplotypes", ylim = c(1, max(means)))
        polygon(x = c(specs, rev(specs)), y = c(lower, rev(upper)), col = "gray")
        lines(specs, means, lwd = 2)
        HAC.bar <- barplot(max(HAC.mat)*probs[K1], xlab = "Unique haplotypes", ylab = "Specimens sampled", names.arg = min(means):ceiling(max(means)))
    }
}

The issue is in the last else statement where I indicate

max(HAC.mat)*probs[K1]

Above, I just put K1 as an illustration. However, in reality, I don't know which row of K1 or K2 was sampled. Is there a way to specify this generically? Something like K[i]?

Thanks! Please let me know if more clarification is needed.

1

There are 1 answers

6
Hack-R On BEST ANSWER

You can use get and paste0. I made your example have less permutations so it finishes faster (since it's just for testing this) and I made the probs different so that you can see that it works.

K <- 2       # number of subarrays
N <- 10 
Hstar <- 10

perms <- 1000

p <- 0.95

specs <- 1:N 

pop <- array(dim = c(c(perms, N), K))

haps <- as.character(1:Hstar)

probs <- seq(1/Hstar, 1,.1) 

for(j in 1:perms){
  for(i in 1:K){ 
    if(i == 1){
      pop[j, specs, i] <- sample(haps, size = N, replace = TRUE, prob = probs)
    }
    else{
      K1 <- c(1:5)
      K2 <- c(6:10)
      pop[j ,, 1] <- sample(haps[K1], size = N, replace = TRUE, prob = probs[K1])
      pop[j ,, 2] <- sample(haps[K2], size = N, replace = TRUE, prob = probs[K2]) 
      print(paste("j is: ", j))
    }
  }
}

HAC.mat <- array(dim = c(c(perms, N), K))

for(k in specs){
  for(j in 1:perms){
    for(i in 1:K){ 
      ind.index <- sample(specs, size = k, replace = FALSE)
      hap.plot <- pop[sample(1:nrow(pop), size = 1, replace = TRUE),     ind.index, sample(1:K, size = 1, replace = TRUE)]
      HAC.mat[j, k, i] <- length(unique(hap.plot))
    }
  }
}

means <- apply(HAC.mat, MARGIN = 2, mean)
lower <- apply(HAC.mat, MARGIN = 2, function(x) quantile(x, 0.025))
upper <- apply(HAC.mat, MARGIN = 2, function(x) quantile(x, 0.975))

par(mfrow = c(1, 2))

for(i in 1:K){
  print(paste("i is:",i))
  print(paste("probs[...] is:", probs[get(paste0("K",i))]))
  if(i == 1){
    plot(specs, means, type = "n", xlab = "Specimens sampled", ylab = "Unique haplotypes", ylim = c(1, Hstar))
    polygon(x = c(specs, rev(specs)), y = c(lower, rev(upper)), col = "gray")
    lines(specs, means, lwd = 2)
    HAC.bar <- barplot(N*probs, xlab = "Unique haplotypes", ylab = "Specimens sampled", names.arg = 1:Hstar)

  }
  else{
    plot(specs, means, type = "n", xlab = "Specimens sampled", ylab = "Unique haplotypes", ylim = c(1, max(means)))
    polygon(x = c(specs, rev(specs)), y = c(lower, rev(upper)), col = "gray")
    lines(specs, means, lwd = 2)
    HAC.bar <- barplot(max(HAC.mat)*probs[get(paste0("K",i))], xlab = "Unique haplotypes", ylab = "Specimens sampled", names.arg = min(means):ceiling(max(means)))
  }
}

I added the print statements so that we can see that it works:

[1] "i is: 1"
[1] "probs[...] is: 0.1" "probs[...] is: 0.2" "probs[...] is: 0.3" "probs[...] is: 0.4" "probs[...] is: 0.5"
[1] "i is: 2"
[1] "probs[...] is: 0.6" "probs[...] is: 0.7" "probs[...] is: 0.8" "probs[...] is: 0.9" "probs[...] is: 1"