Running ks test on multiple groups in R

Question

Running ks test on multiple groups in R

541 views Asked by PesKchan At 16 September 2022 at 10:57

This is my data frame a subset of my big one as an example

dput(eee)
structure(list(interactome = c("HINT-binary", "HINT-binary", 
"HINT-binary", "HINT-binary", "HINT-binary", "HINT-binary", "HINT-comp", 
"HINT-comp", "HINT-comp", "HINT-comp", "HINT-comp", "HINT-comp", 
"InBioMap", "InBioMap", "InBioMap", "InBioMap", "InBioMap", "InBioMap", 
"Menche-2015", "Menche-2015", "Menche-2015", "Menche-2015", "Menche-2015", 
"Menche-2015"), class = c("observed", "rewired", "rewired", "rewired", 
"rewired", "rewired", "observed", "rewired", "rewired", "rewired", 
"rewired", "rewired", "observed", "rewired", "rewired", "rewired", 
"rewired", "rewired", "observed", "rewired", "rewired", "rewired", 
"rewired", "rewired"), PPI = c(844L, 609L, 591L, 593L, 590L, 
608L, 1329L, 874L, 872L, 864L, 807L, 855L, 7077L, 5049L, 5051L, 
5025L, 4975L, 5014L, 2445L, 1673L, 1652L, 1716L, 1712L, 1683L
), LCC = c(290L, 191L, 188L, 214L, 183L, 215L, 401L, 346L, 365L, 
366L, 359L, 356L, 635L, 615L, 613L, 613L, 617L, 615L, 528L, 476L, 
493L, 490L, 492L, 480L)), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 
1002L, 1003L, 1004L, 1005L, 1006L, 1007L, 2003L, 2004L, 2005L, 
2006L, 2007L, 2008L, 3004L, 3005L, 3006L, 3007L, 3008L, 3009L
), class = "data.frame")

I would like to run ks test on my different groups.

My groups in the data-frame as such "HINT-binary" "HINT-comp" "InBioMap" "Menche-2015"

Here I found one solution but Im not sure how to modify for my data frame

Any suggestion or help would be really appreciated

UPDATE this is what I'm trying to replicate KS test

The description for the figure give as such

(D) Number of protein-protein interactions (PPIs) between LC genes observed in the high-confidence human interactome (Menche et al., 2015) (dotted line) and 1000 randomized interactome networks (density), revealing significant enrichment for PPIs between LC genes relative to random expectation (p < 10−3). (E) Size of the largest connected component (LCC) between LC genes in the high-confidence human interactome (dotted line) and 1000 randomized interactome networks (density), revealing LC genes occupy a distinct region of the human interactome (p < 10−3). (F) LC genes are prioritized by a disease gene prediction algorithm (Ghiassian et al., 2015) (p < 10−15, Kolmogorov–Smirnov test).

Original Q&A

There are 1 answers

**Parfait** · Accepted Answer · 2022-09-16T21:32:33+00:00

Consider combn to pass pairwise combinations of those groups into ks.test method:

# BUILD NESTED LIST OF RESULTS
ks_results <- combn(
  unique(eee$interactome), 
  2, 
  FUN = \(x) list(
    PPI_ks_results = ks.test(
      eee$PPI[eee$interactome == x[1]], eee$PPI[eee$interactome == x[2]]
    ),
    LCC_ks_results = ks.test(
      eee$LCC[eee$interactome == x[1]], eee$LCC[eee$interactome == x[2]]
    )
  ),
  simplify = FALSE
)

# NAME LIST ELEMENTS
ks_results_names <- setNames(
  ks_results,
  combn(
    unique(eee$interactome), 2, simplify = FALSE
  ) |> lapply(
    \(x) paste(x, collapse="_")
  )
)

Output

# REVIEW LIST AND ELEMENTS
str(ks_results)

# List of 6
# $ HINT-binary_HINT-comp  :List of 2
# ..$ PPI_ks_results:List of 5
# .. ..$ statistic  : Named num 0.833
# .. .. ..- attr(*, "names")= chr "D"
# .. ..$ p.value    : num 0.026
# .. ..$ alternative: chr "two-sided"
# .. ..$ method     : chr "Two-sample Kolmogorov-Smirnov test"
# .. ..$ data.name  : chr "eee$PPI[eee$interactome == x[1]] and eee$PPI[eee$interactome == x[2]]"
# .. ..- attr(*, "class")= chr "htest"
# ..$ LCC_ks_results:List of 5
# .. ..$ statistic  : Named num 1
# .. .. ..- attr(*, "names")= chr "D"
# .. ..$ p.value    : num 0.00216
# .. ..$ alternative: chr "two-sided"
# .. ..$ method     : chr "Two-sample Kolmogorov-Smirnov test"
# .. ..$ data.name  : chr "eee$LCC[eee$interactome == x[1]] and eee$LCC[eee$interactome == x[2]]"
# .. ..- attr(*, "class")= chr "htest"
# $ HINT-binary_InBioMap   :List of 2
# ..$ PPI_ks_results:List of 5
# .. ..$ statistic  : Named num 1
# .. .. ..- attr(*, "names")= chr "D"
# .. ..$ p.value    : num 0.00216
# .. ..$ alternative: chr "two-sided"
# .. ..$ method     : chr "Two-sample Kolmogorov-Smirnov test"
# .. ..$ data.name  : chr "eee$PPI[eee$interactome == x[1]] and eee$PPI[eee$interactome == x[2]]"
# .. ..- attr(*, "class")= chr "htest"
# ..$ LCC_ks_results:List of 5
# .. ..$ statistic  : Named num 1
# .. .. ..- attr(*, "names")= chr "D"
# .. ..$ p.value    : num 0.00496
# .. ..$ alternative: chr "two-sided"
# .. ..$ method     : chr "Two-sample Kolmogorov-Smirnov test"
# .. ..$ data.name  : chr "eee$LCC[eee$interactome == x[1]] and eee$LCC[eee$interactome == x[2]]"
# .. ..- attr(*, "class")= chr "htest"
# $ HINT-binary_Menche-2015:List of 2
# ..$ PPI_ks_results:List of 5
# .. ..$ statistic  : Named num 1
# .. .. ..- attr(*, "names")= chr "D"
# .. ..$ p.value    : num 0.00216
# .. ..$ alternative: chr "two-sided"
# .. ..$ method     : chr "Two-sample Kolmogorov-Smirnov test"
# .. ..$ data.name  : chr "eee$PPI[eee$interactome == x[1]] and eee$PPI[eee$interactome == x[2]]"
# .. ..- attr(*, "class")= chr "htest"
# ..$ LCC_ks_results:List of 5
# .. ..$ statistic  : Named num 1
# .. .. ..- attr(*, "names")= chr "D"
# .. ..$ p.value    : num 0.00216
# .. ..$ alternative: chr "two-sided"
# .. ..$ method     : chr "Two-sample Kolmogorov-Smirnov test"
# .. ..$ data.name  : chr "eee$LCC[eee$interactome == x[1]] and eee$LCC[eee$interactome == x[2]]"
# .. ..- attr(*, "class")= chr "htest"
# $ HINT-comp_InBioMap     :List of 2
# ..$ PPI_ks_results:List of 5
# .. ..$ statistic  : Named num 1
# .. .. ..- attr(*, "names")= chr "D"
# .. ..$ p.value    : num 0.00216
# .. ..$ alternative: chr "two-sided"
# .. ..$ method     : chr "Two-sample Kolmogorov-Smirnov test"
# .. ..$ data.name  : chr "eee$PPI[eee$interactome == x[1]] and eee$PPI[eee$interactome == x[2]]"
# .. ..- attr(*, "class")= chr "htest"
# ..$ LCC_ks_results:List of 5
# .. ..$ statistic  : Named num 1
# .. .. ..- attr(*, "names")= chr "D"
# .. ..$ p.value    : num 0.00496
# .. ..$ alternative: chr "two-sided"
# .. ..$ method     : chr "Two-sample Kolmogorov-Smirnov test"
# .. ..$ data.name  : chr "eee$LCC[eee$interactome == x[1]] and eee$LCC[eee$interactome == x[2]]"
# .. ..- attr(*, "class")= chr "htest"
# $ HINT-comp_Menche-2015  :List of 2
# ..$ PPI_ks_results:List of 5
# .. ..$ statistic  : Named num 1
# .. .. ..- attr(*, "names")= chr "D"
# .. ..$ p.value    : num 0.00216
# .. ..$ alternative: chr "two-sided"
# .. ..$ method     : chr "Two-sample Kolmogorov-Smirnov test"
# .. ..$ data.name  : chr "eee$PPI[eee$interactome == x[1]] and eee$PPI[eee$interactome == x[2]]"
# .. ..- attr(*, "class")= chr "htest"
# ..$ LCC_ks_results:List of 5
# .. ..$ statistic  : Named num 1
# .. .. ..- attr(*, "names")= chr "D"
# .. ..$ p.value    : num 0.00216
# .. ..$ alternative: chr "two-sided"
# .. ..$ method     : chr "Two-sample Kolmogorov-Smirnov test"
# .. ..$ data.name  : chr "eee$LCC[eee$interactome == x[1]] and eee$LCC[eee$interactome == x[2]]"
# .. ..- attr(*, "class")= chr "htest"
# $ InBioMap_Menche-2015   :List of 2
# ..$ PPI_ks_results:List of 5
# .. ..$ statistic  : Named num 1
# .. .. ..- attr(*, "names")= chr "D"
# .. ..$ p.value    : num 0.00216
# .. ..$ alternative: chr "two-sided"
# .. ..$ method     : chr "Two-sample Kolmogorov-Smirnov test"
# .. ..$ data.name  : chr "eee$PPI[eee$interactome == x[1]] and eee$PPI[eee$interactome == x[2]]"
# .. ..- attr(*, "class")= chr "htest"
# ..$ LCC_ks_results:List of 5
# .. ..$ statistic  : Named num 1
# .. .. ..- attr(*, "names")= chr "D"
# .. ..$ p.value    : num 0.00496
# .. ..$ alternative: chr "two-sided"
# .. ..$ method     : chr "Two-sample Kolmogorov-Smirnov test"
# .. ..$ data.name  : chr "eee$LCC[eee$interactome == x[1]] and eee$LCC[eee$interactome == x[2]]"
# .. ..- attr(*, "class")= chr "htest"

Access Individual Elements

ks_results$`HINT-binary_HINT-comp`$PPI_ks_results$statistic
#         D 
# 0.8333333 
ks_results$`HINT-binary_HINT-comp`$PPI_ks_results$p.value
# [1] 0.02597403

Bind to Data Frame

data.frame(
  statistic = sapply(ks_results, \(x) x$PPI_ks_results$statistic),
  p_value = sapply(ks_results, \(x) x$PPI_ks_results$p.value),
  alternative = sapply(ks_results, \(x) x$PPI_ks_results$alternative),
  method = sapply(ks_results, \(x) x$PPI_ks_results$method)
)

#                           statistic     p_value alternative                             method
# HINT-binary_HINT-comp.D   0.8333333 0.025974026   two-sided Two-sample Kolmogorov-Smirnov test
# HINT-binary_InBioMap.D    1.0000000 0.002164502   two-sided Two-sample Kolmogorov-Smirnov test
# HINT-binary_Menche-2015.D 1.0000000 0.002164502   two-sided Two-sample Kolmogorov-Smirnov test
# HINT-comp_InBioMap.D      1.0000000 0.002164502   two-sided Two-sample Kolmogorov-Smirnov test
# HINT-comp_Menche-2015.D   1.0000000 0.002164502   two-sided Two-sample Kolmogorov-Smirnov test
# InBioMap_Menche-2015.D    1.0000000 0.002164502   two-sided Two-sample Kolmogorov-Smirnov test

TechQA.

Running ks test on multiple groups in R

There are 1 answers

Related Questions in R

Related Questions in KOLMOGOROV-SMIRNOV

Popular Questions

Trending Questions