Convert normal R data frame into ragged format (a la ftable)

270 views Asked by At

In R the function ftable() creates by default a table with a so called ragged appearance:

data(UCBAdmissions)
ftable(UCBAdmissions)

...

                Dept   A   B   C   D   E   F
Admit    Gender                             
Admitted Male        512 353 120 138  53  22
         Female       89  17 202 131  94  24
Rejected Male        313 207 205 279 138 351
         Female       19   8 391 244 299 317

Rows and columns are “ragged” in the sense that labels are only displayed when they change, with the obvious convention that rows are read from top to bottom and columns are read from left to right. (https://cran.r-project.org/doc/manuals/r-devel/R-data.html#Flat-contingency-tables)

Question:

How can I get same "ragged" appearance for a normal data.frame object?

Reproducible example:

before= data.frame(C1= c(rep("A", 5), rep("L", 2)),
                   C2= c("B", rep("E", 3), rep("K", 2), "L"),
                   C3= c("C", "F", rep("H", 5)),
                   C4= c("D", "G", "I", rep("J", 4)), 
                   stringsAsFactors = FALSE)

before

...

  C1 C2 C3 C4
1  A  B  C  D
2  A  E  F  G
3  A  E  H  I
4  A  E  H  J
5  A  K  H  J
6  L  K  H  J
7  L  L  H  J

How does a function look like which converts the object before to a new object after of class data.frame, which is printed to console with print(after) as follows...

  C1 C2 C3 C4
1  A  B  C  D
2     E  F  G
3        H  I
4           J
5     K  H  J
6  L  K  H  J
7     L  H  J

If necessary, it is acceptable that the left out data are lost for this presentation format.

2

There are 2 answers

0
user2030503 On BEST ANSWER

Maybe not the most elegant solution (a. lots of for loops, b. coercing any type of column to character, c. no input assertions, d. slow, etc.), but following function rag_blank seems to basically work as requested on the example:

## Task

before= data.frame(C1= c(rep("A", 5), rep("L", 2)),
                   C2= c("B", rep("E", 3), rep("K", 2), "L"),
                   C3= c("C", "F", rep("H", 5)),
                   C4= c("D", "G", "I", rep("J", 4)), 
                   stringsAsFactors = FALSE)

before


## Solution

library(dplyr)

rag_blank= function(x, cols= seq_along(x), blank= ":"){

  # Copy input
  res= x

  # 1st step: blank trailing cells
  for(df_col in cols){
    res[, df_col]= as.character(unlist(res[, df_col]))
    x[, df_col]= as.character(unlist(x[, df_col]))
    re= rle(unlist(res[, df_col]))
    re_df= data.frame(value= re$values,
                      length= re$lengths,
                      stringsAsFactors = F) %>%
      mutate(idx_start= cumsum(length) - length + 2,
             idx_end= idx_start + length -2)
    for(re_row in 1:nrow(re_df)){
      if(re_df$idx_start[re_row]<= re_df$idx_end[re_row]){
        res[(re_df$idx_start[re_row]:re_df$idx_end[re_row]), df_col]= blank
      }
    }
  }

  # 2nd step: restore value if blank, resp. changed from 1st step but left cell it is not blank
  for(df_col in cols[-1]){
    changed_before= res[, df_col]!= x[, df_col]
    left_not_changed= res[, df_col-1]== x[, df_col-1]
    to_change= changed_before & left_not_changed
    res[to_change, df_col]= x[to_change, df_col]
  }

  res
}

rag_blank(before)

...

  C1 C2 C3 C4
1  A  B  C  D
2  :  E  F  G
3  :  :  H  I
4  :  :  :  J
5  :  K  H  J
6  L  K  H  J
7  :  L  H  J

In some cases applying blanks is not appropriate, then this may be helpful:

rag_index= function(x){
  rag_blank(x) != x
}

rag_index(before)

...

        C1    C2    C3    C4
[1,] FALSE FALSE FALSE FALSE
[2,]  TRUE FALSE FALSE FALSE
[3,]  TRUE  TRUE FALSE FALSE
[4,]  TRUE  TRUE  TRUE FALSE
[5,]  TRUE FALSE FALSE FALSE
[6,] FALSE FALSE FALSE FALSE
[7,]  TRUE FALSE FALSE FALSE

More busy example:

data("diamonds", package = "ggplot2")
print(rag_blank(x= head(diamonds, 30)), n= 100)

...

Source: local data frame [30 x 10]

   carat       cut color clarity depth table price     x     y     z
   <chr>     <chr> <chr>   <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1   0.23     Ideal     E     SI2  61.5    55   326  3.95  3.98  2.43
2   0.21   Premium     E     SI1  59.8    61   326  3.89  3.84  2.31
3   0.23      Good     E     VS1  56.9    65   327  4.05  4.07  2.31
4   0.29   Premium     I     VS2  62.4    58   334   4.2  4.23  2.63
5   0.31      Good     J     SI2  63.3    58   335  4.34  4.35  2.75
6   0.24 Very Good     J    VVS2  62.8    57   336  3.94  3.96  2.48
7      :         :     I    VVS1  62.3    57   336  3.95  3.98  2.47
8   0.26 Very Good     H     SI1  61.9    55   337  4.07  4.11  2.53
9   0.22      Fair     E     VS2  65.1    61   337  3.87  3.78  2.49
10  0.23 Very Good     H     VS1  59.4    61   338     4  4.05  2.39
11   0.3      Good     J     SI1    64    55   339  4.25  4.28  2.73
12  0.23     Ideal     J     VS1  62.8    56   340  3.93   3.9  2.46
13  0.22   Premium     F     SI1  60.4    61   342  3.88  3.84  2.33
14  0.31     Ideal     J     SI2  62.2    54   344  4.35  4.37  2.71
15   0.2   Premium     E     SI2  60.2    62   345  3.79  3.75  2.27
16  0.32   Premium     E      I1  60.9    58   345  4.38  4.42  2.68
17   0.3     Ideal     I     SI2    62    54   348  4.31  4.34  2.68
18     :      Good     J     SI1  63.4    54   351  4.23  4.29   2.7
19     :         :     :       :  63.8    56   351  4.23  4.26  2.71
20     : Very Good     J     SI1  62.7    59   351  4.21  4.27  2.66
21     :      Good     I     SI2  63.3    56   351  4.26   4.3  2.71
22  0.23 Very Good     E     VS2  63.8    55   352  3.85  3.92  2.48
23     :         :     H     VS1    61    57   353  3.94  3.96  2.41
24  0.31 Very Good     J     SI1  59.4    62   353  4.39  4.43  2.62
25     :         :     :       :  58.1    62   353  4.44  4.47  2.59
26  0.23 Very Good     G    VVS2  60.4    58   354  3.97  4.01  2.41
27  0.24   Premium     I     VS1  62.5    57   355  3.97  3.94  2.47
28   0.3 Very Good     J     VS2  62.2    57   357  4.28   4.3  2.67
29  0.23 Very Good     D     VS2  60.5    61   357  3.96  3.97   2.4
30     :         :     F     VS1  60.9    57   357  3.96  3.99  2.42

In case there are more elegant solutions, appreciate your feedback.

0
A5C1D2H2I1M1N2O1R2T1 On

Here's the set of functions that I came up for this:

# The main function
ragged <- function(indt, keys, blank = "") {
  require(data.table)
  indt <- setkeyv(as.data.table(indt), keys)
  vals <- setdiff(names(indt), keys)
  nams <- paste0(keys, "_copy")
  for (i in seq_along(nams)) {
    indt[, (nams[i]) := c(as.character(get(key(indt)[i])[1]),
                          rep(blank, .N-1)), by = eval(keys[seq(i)])]
  }
  out <- cbind(indt[, ..nams], indt[, ..vals])
  out <- setnames(out, nams, keys)[]
  ## There has to be a better way to do this than to store the original object and the resulting object
  out <- list(indt = indt[, (nams) := NULL][], out = out, keys = keys, blank = blank)
  class(out) <- c("ragged", class(out))
  out
}

# The print method
print.ragged <- function(x, ...) {
  print(x$out)
}

# Allowing for extraction
`[.ragged` <- function(inragged, ...) {
  out <- inragged$indt[...]
  out <- ragged(out, keys = intersect(inragged$keys, names(out)), blank = inragged$blank)
  out
}

It uses the data.table package and first sorts the data using setkey. In my opinion, it makes sense to sort the data if you're going to do this kind of hierarchical display.

Here are some examples with your before dataset.

# Nesting just the first two columns.
ragged(before, c("C1", "C2"))
##    C1 C2 C3 C4
## 1:  A  B  C  D
## 2:     E  F  G
## 3:        H  I
## 4:        H  J
## 5:     K  H  J
## 6:  L  K  H  J
## 7:     L  H  J

# Nesting with all the columns and inserting a marker
ragged(before, names(before), ":")
##    C1 C2 C3 C4
## 1:  A  B  C  D
## 2:  :  E  F  G
## 3:  :  :  H  I
## 4:  :  :  :  J
## 5:  :  K  H  J
## 6:  L  K  H  J
## 7:  :  L  H  J

Note that since the data are sorted before using ragged, the results from this function being used on head(diamonds, 30) will be different from your approach.

ragged(head(diamonds, 30), names(diamonds), ":")
##     carat       cut color clarity depth table price    x    y    z
##  1:   0.2   Premium     E     SI2  60.2    62   345 3.79 3.75 2.27
##  2:  0.21   Premium     E     SI1  59.8    61   326 3.89 3.84 2.31
##  3:  0.22      Fair     E     VS2  65.1    61   337 3.87 3.78 2.49
##  4:     :   Premium     F     SI1  60.4    61   342 3.88 3.84 2.33
##  5:  0.23      Good     E     VS1  56.9    65   327 4.05 4.07 2.31
##  6:     : Very Good     D     VS2  60.5    61   357 3.96 3.97  2.4
##  7:     :         :     E     VS2  63.8    55   352 3.85 3.92 2.48
##  8:     :         :     F     VS1  60.9    57   357 3.96 3.99 2.42
##  9:     :         :     G    VVS2  60.4    58   354 3.97 4.01 2.41
## 10:     :         :     H     VS1  59.4    61   338    4 4.05 2.39
## 11:     :         :     :       :    61    57   353 3.94 3.96 2.41
## 12:     :     Ideal     E     SI2  61.5    55   326 3.95 3.98 2.43
## 13:     :         :     J     VS1  62.8    56   340 3.93  3.9 2.46
## 14:  0.24 Very Good     I    VVS1  62.3    57   336 3.95 3.98 2.47
## 15:     :         :     J    VVS2  62.8    57   336 3.94 3.96 2.48
## 16:     :   Premium     I     VS1  62.5    57   355 3.97 3.94 2.47
## 17:  0.26 Very Good     H     SI1  61.9    55   337 4.07 4.11 2.53
## 18:  0.29   Premium     I     VS2  62.4    58   334  4.2 4.23 2.63
## 19:   0.3      Good     I     SI2  63.3    56   351 4.26  4.3 2.71
## 20:     :         :     J     SI1  63.4    54   351 4.23 4.29  2.7
## 21:     :         :     :       :  63.8    56   351 4.23 4.26 2.71
## 22:     :         :     :       :    64    55   339 4.25 4.28 2.73
## 23:     : Very Good     J     SI1  62.7    59   351 4.21 4.27 2.66
## 24:     :         :     :     VS2  62.2    57   357 4.28  4.3 2.67
## 25:     :     Ideal     I     SI2    62    54   348 4.31 4.34 2.68
## 26:  0.31      Good     J     SI2  63.3    58   335 4.34 4.35 2.75
## 27:     : Very Good     J     SI1  58.1    62   353 4.44 4.47 2.59
## 28:     :         :     :       :  59.4    62   353 4.39 4.43 2.62
## 29:     :     Ideal     J     SI2  62.2    54   344 4.35 4.37 2.71
## 30:  0.32   Premium     E      I1  60.9    58   345 4.38 4.42 2.68
##     carat       cut color clarity depth table price    x    y    z

The [.ragged function lets us continue to do operations on the ragged object. For example:

ragged(head(diamonds, 30), c("cut", "color"), ":")[, mean(price), .(cut, color)]
##           cut color       V1
##  1:      Fair     E 337.0000
##  2:      Good     E 327.0000
##  3:         :     I 351.0000
##  4:         :     J 344.0000
##  5: Very Good     D 357.0000
##  6:         :     E 352.0000
##  7:         :     F 357.0000
##  8:         :     G 354.0000
##  9:         :     H 342.6667
## 10:         :     I 336.0000
## 11:         :     J 350.0000
## 12:   Premium     E 338.6667
## 13:         :     F 342.0000
## 14:         :     I 344.5000
## 15:     Ideal     E 326.0000
## 16:         :     I 348.0000
## 17:         :     J 342.0000