colnames() inconsistent with name in dataframe

32 views Asked by At

I have the oddest problem, it's never happened to me before and I haven't found any solution yet.

I'm working on a dataframe where several columns have similar names (S1Q1.pre / S1Q1.post, S1Q2.pre/S1Q2.post, ..., S1Qx.pre/S1Qx.post etc).

Example:

delta <- data.frame(S1Q1.pre = sample(1:7, 5, replace=TRUE), S1Q1.post = sample(1:7, 5, replace=TRUE), S1Q2.pre = sample(1:7, 5, replace=TRUE), S1Q2.post = sample(1:7, 5, replace=TRUE),
S1Q3.pre = sample(1:7, 5, replace=TRUE), S1Q3.post = sample(1:7, 5, replace=TRUE),S1Q4.pre = sample(1:7, 5, replace=TRUE), S1Q4.post = sample(1:7, 5, replace=TRUE))

For each of these pairs of variables (.pre/.post), I want to calculate a difference as follows:

S1Qx.delta = S1Qx.post - S1Qx.pre

I have a problem with the name of the new variable S1Qx.delta.

I've used this loop:

S1list <- paste0('S1Q',1:4)

for (q in 1:length(S1list)) {
quest <- S1list[q]
delta$temp <- NA
delta$temp <- delta[which(colnames(delta) == paste0(quest,'.post'))] -
    delta[which(colnames(delta) == paste0(quest,'.pre'))]
names(delta)[which(names(delta) == 'temp')] <- paste0(quest,'.delta')
}

It seems to work almost perfectly: I obtain the new variables S1Qx.delta as I wanted. I can retrieve this variables with delta$S1Qx.delta as usual.

But when I use names(), colnames() or head(), I get this:

names(delta$S1Q1.delta)
[1] "S1Q1.post"

Has anyone ever had this problem? Do you know how to solve it?

Note: I've tried the same loop with different syntaxes to rename delta$temp but it doesn't solve my problem. For instance I've tried:

colnames(delta)[which(colnames(delta) == 'temp')] <- paste0(quest,'.delta')

names(delta)[names(delta) == 'temp'] <- paste0(quest,'.delta') 
1

There are 1 answers

1
stefan On

The issue is that you when computing the differences you extract the columns using [ (instead of [[). As a result you don't get vectors but 1-column data.frames instead. Hence your temp or .delta columns are 1-column data.frames too:

set.seed(123)

delta <- data.frame(
  S1Q1.pre = sample(1:7, 5, replace = TRUE), S1Q1.post = sample(1:7, 5, replace = TRUE), S1Q2.pre = sample(1:7, 5, replace = TRUE), S1Q2.post = sample(1:7, 5, replace = TRUE),
  S1Q3.pre = sample(1:7, 5, replace = TRUE), S1Q3.post = sample(1:7, 5, replace = TRUE), S1Q4.pre = sample(1:7, 5, replace = TRUE), S1Q4.post = sample(1:7, 5, replace = TRUE)
)

S1list <- paste0("S1Q", 1:4)

for (q in 1:length(S1list)) {
  quest <- S1list[q]
  delta$temp <- NA
  delta$temp <- delta[which(colnames(delta) == paste0(quest, ".post"))] -
    delta[which(colnames(delta) == paste0(quest, ".pre"))]
  names(delta)[which(names(delta) == "temp")] <- paste0(quest, ".delta")
}

str(delta)
#> 'data.frame':    5 obs. of  12 variables:
#>  $ S1Q1.pre  : int  7 7 3 6 3
#>  $ S1Q1.post : int  2 2 6 3 5
#>  $ S1Q2.pre  : int  4 6 6 1 2
#>  $ S1Q2.post : int  3 5 3 3 1
#>  $ S1Q3.pre  : int  4 1 1 5 3
#>  $ S1Q3.post : int  2 7 2 1 6
#>  $ S1Q4.pre  : int  3 4 6 1 3
#>  $ S1Q4.post : int  7 5 4 7 2
#>  $ S1Q1.delta:'data.frame':  5 obs. of  1 variable:
#>   ..$ S1Q1.post: int  -5 -5 3 -3 2
#>  $ S1Q2.delta:'data.frame':  5 obs. of  1 variable:
#>   ..$ S1Q2.post: int  -1 -1 -3 2 -1
#>  $ S1Q3.delta:'data.frame':  5 obs. of  1 variable:
#>   ..$ S1Q3.post: int  -2 6 1 -4 3
#>  $ S1Q4.delta:'data.frame':  5 obs. of  1 variable:
#>   ..$ S1Q4.post: int  4 1 -2 6 -1
class(delta$S1Q1.delta)
#> [1] "data.frame"
names(delta$S1Q1.delta)
#> [1] "S1Q1.post"

delta$S1Q1.delta
#>   S1Q1.post
#> 1        -5
#> 2        -5
#> 3         3
#> 4        -3
#> 5         2

Instead, you could use [[ to get vectors:

for (q in 1:length(S1list)) {
  quest <- S1list[q]
  delta$temp <- NA
  delta$temp <- delta[[which(colnames(delta) == paste0(quest, ".post"))]] -
    delta[[which(colnames(delta) == paste0(quest, ".pre"))]]
  names(delta)[which(names(delta) == "temp")] <- paste0(quest, ".delta")
}

str(delta)
#> 'data.frame':    5 obs. of  12 variables:
#>  $ S1Q1.pre  : int  7 7 3 6 3
#>  $ S1Q1.post : int  2 2 6 3 5
#>  $ S1Q2.pre  : int  4 6 6 1 2
#>  $ S1Q2.post : int  3 5 3 3 1
#>  $ S1Q3.pre  : int  4 1 1 5 3
#>  $ S1Q3.post : int  2 7 2 1 6
#>  $ S1Q4.pre  : int  3 4 6 1 3
#>  $ S1Q4.post : int  7 5 4 7 2
#>  $ S1Q1.delta: int  -5 -5 3 -3 2
#>  $ S1Q2.delta: int  -1 -1 -3 2 -1
#>  $ S1Q3.delta: int  -2 6 1 -4 3
#>  $ S1Q4.delta: int  4 1 -2 6 -1
class(delta$S1Q1.delta)
#> [1] "integer"
names(delta$S1Q1.delta)
#> NULL

delta$S1Q1.delta
#> [1] -5 -5  3 -3  2

But overall you could achieve your desired result more easily like so:

delta[paste0(S1list, ".delta")] <- delta[paste0(S1list, ".post")] - delta[paste0(S1list, ".pre")]

delta
#>   S1Q1.pre S1Q1.post S1Q2.pre S1Q2.post S1Q3.pre S1Q3.post S1Q4.pre S1Q4.post
#> 1        7         2        4         3        4         2        3         7
#> 2        7         2        6         5        1         7        4         5
#> 3        3         6        6         3        1         2        6         4
#> 4        6         3        1         3        5         1        1         7
#> 5        3         5        2         1        3         6        3         2
#>   S1Q1.delta S1Q2.delta S1Q3.delta S1Q4.delta
#> 1         -5         -1         -2          4
#> 2         -5         -1          6          1
#> 3          3         -3          1         -2
#> 4         -3          2         -4          6
#> 5          2         -1          3         -1