Assign values based on reverse combination of two columns

70 views Asked by At

Following this question, Multiple values in one tile with geom_tile and matrix plot, I'd like the tile attributes in agreement_num to be symmetrical, i.e. for the reverse pair combination, I'd like to plot the same agreement_num. I'm creating a new question since this entails finding the reverse combination of pairs, assigning the corresponding agreement_num and then inserting new rows for each combination and agreement_num. E.g. from Kazakhstan to Kyrgyzstan there are six rows and six unique agreement_num. The expected output would be to have these rows in the dataframe:

from        to          weight          agreement_num
Kyrgyzstan  Kazakhstan  1.337996e+08    51
Kyrgyzstan  Kazakhstan  1.337996e+08    176
Kyrgyzstan  Kazakhstan  1.337996e+08    58
Kyrgyzstan  Kazakhstan  1.337996e+08    224
Kyrgyzstan  Kazakhstan  1.337996e+08    133
Kyrgyzstan  Kazakhstan  1.337996e+08    135

to be able to plot the same tile attribute for the reverse country combination in the matrix plot.

I've managed to get quite far to create the data below, but cannot figure out the last steps of inserting new rows and assigning the agreement_num for each row:

# Create empty vectors to store values
oneway <- logical(nrow(plot_data))
additional_value <- list()

# Iterate over each row of the data frame
for (i in 1:nrow(plot_data)) {
  # Find indices of reverse pairs
  reverse_indices <- which(plot_data$from == plot_data$to[i] & plot_data$to == plot_data$from[i] & seq_len(nrow(plot_data)) != i)
  
  # Assign the oneway value
  oneway[i] <- length(reverse_indices) > 0
  
  # Assign additional value based on reverse combination
  if (oneway[i]) {
    # Add the additional values from the "agreement_num" column of the reverse pairs
    additional_value[[i]] <- unique(plot_data$agreement_num[reverse_indices])
  } else {
    # Assign NA if oneway is FALSE
    additional_value[[i]] <- NA
  }
}

# Add the vectors as new columns to the data frame
plot_data$oneway <- oneway
plot_data$additional_value <- additional_value

Reproducible data:

plot_data <- structure(list(from = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L), levels = c("Afghanistan", 
"Kazakhstan", "Kyrgyzstan", "Tajikistan", "Turkmenistan", "Uzbekistan"
), class = "factor"), to = structure(c(1L, 3L, 3L, 3L, 3L, 3L, 
3L, 4L, 4L, 4L, 5L, 5L, 6L, 6L, 2L, 6L), levels = c("Afghanistan", 
"Kazakhstan", "Kyrgyzstan", "Tajikistan", "Turkmenistan", "Uzbekistan"
), class = "factor"), weight = c(1291072130433.34, 480160896152.234, 
480160896152.234, 480160896152.234, 480160896152.234, 480160896152.234, 
480160896152.234, 3474907531417.02, 3474907531417.02, 3474907531417.02, 
867103764128.709, 867103764128.709, 7791981051421.92, 7791981051421.92, 
133799551.098735, 1102379004.66647), agreement_num = c(NA, 51L, 
176L, 58L, 224L, 133L, 135L, 58L, 51L, 224L, 51L, 224L, 51L, 
224L, NA, NA), com.x = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L), com.y = c(2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L)), class = "data.frame", row.names = c(NA, 
-16L))

Perhaps there is an easier way to do this, would be happy to get suggestions.

Example of expected output: 

plot_data

from        to          weight          agreement_num
Kazakhstan  Afghanistan 1.291072e+12    NA
Kazakhstan  Kyrgyzstan  4.801609e+1     51
Kazakhstan  Kyrgyzstan  4.801609e+1     176
Kazakhstan  Kyrgyzstan  4.801609e+1     58
Kazakhstan  Kyrgyzstan  4.801609e+1     224
Kazakhstan  Kyrgyzstan  4.801609e+1     133
Kazakhstan  Kyrgyzstan  4.801609e+1     135
[other rows]
Kyrgyzstan  Kazakhstan  1.337996e+08    NA

desired output

from        to          weight          agreement_num
Kazakhstan  Afghanistan 1.291072e+12    NA
Kazakhstan  Kyrgyzstan  4.801609e+1     51
Kazakhstan  Kyrgyzstan  4.801609e+1     176
Kazakhstan  Kyrgyzstan  4.801609e+1     58
Kazakhstan  Kyrgyzstan  4.801609e+1     224
Kazakhstan  Kyrgyzstan  4.801609e+1     133
Kazakhstan  Kyrgyzstan  4.801609e+1     135
[other rows]
Kyrgyzstan  Kazakhstan  1.337996e+08    51
Kyrgyzstan  Kazakhstan  1.337996e+08    176
Kyrgyzstan  Kazakhstan  1.337996e+08    58
Kyrgyzstan  Kazakhstan  1.337996e+08    224
Kyrgyzstan  Kazakhstan  1.337996e+08    133
Kyrgyzstan  Kazakhstan  1.337996e+08    135

not desired output

from        to          weight          agreement_num
Kazakhstan  Afghanistan 1.291072e+12    NA
Kazakhstan  Kyrgyzstan  4.801609e+1     51
Kazakhstan  Kyrgyzstan  4.801609e+1     176
Kazakhstan  Kyrgyzstan  4.801609e+1     58
Kazakhstan  Kyrgyzstan  4.801609e+1     224
Kazakhstan  Kyrgyzstan  4.801609e+1     133
Kazakhstan  Kyrgyzstan  4.801609e+1     135
Afghanistan Kazakhstan  1.291072e+12    NA #(don't want this row because Afghanistan    Kazakhstan is not represented in plot_data)
[other rows]
Kyrgyzstan  Kazakhstan  1.337996e+08    51  
Kyrgyzstan  Kazakhstan  1.337996e+08    176
Kyrgyzstan  Kazakhstan  1.337996e+08    58
Kyrgyzstan  Kazakhstan  1.337996e+08    224
Kyrgyzstan  Kazakhstan  1.337996e+08    133
Kyrgyzstan  Kazakhstan  1.337996e+08    135

Adding the code for plotting:


# Set agreement_num as factor

plot_data$agreement_num <- as.factor(plot_data$agreement_num)

# Allow multiple tiles

plot_data <- plot_data |>
  mutate(
    x = as.numeric(from),
    y = as.numeric(to),
    ymin = y - .5, ymax = y + .5
  ) |>
  mutate(
    n = n(),
    xmin = x + scales::rescale(row_number(),
                               from = c(1, unique(n) + 1),
                               to = .5 * c(-1, 1)
    ),
    xmax = x + scales::rescale(row_number() + 1,
                               from = c(1, unique(n) + 1),
                               to = .5 * c(-1, 1)
    ),
    .by = c(from, to)
  )


# Create color palette

par(mar=c(0,0,1,0))

coul <- brewer.pal(9, "Set3") 

# Plot
ggplot(plot_data, aes(x = from, y = to, fill = agreement_num)) +
  geom_rect(
    aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax)
  ) +
  scale_fill_manual(values = coul) +
  theme_bw() +
  scale_x_discrete(drop = FALSE) +
  scale_y_discrete(drop = FALSE) +
  labs(title = "Community 2") +
  theme(plot.title = element_text(size=17),
    axis.text.x = element_text(
      size = 12, angle = 270,
      hjust = 0, vjust = 0
    ),
    axis.text.y = element_text(size = 12),
    axis.title.x = element_text(size = 15),
    axis.title.y = element_text(size = 15),
    aspect.ratio = 1
  )
1

There are 1 answers

12
r2evans On BEST ANSWER

I think an NA in the agreement_num column suggests that's a pair we need to reverse and augment ...

library(dplyr)
plot_data |>
  filter(is.na(agreement_num)) |>
  select(from=to, to=from, weight) |>
  inner_join(plot_data, by = c("from", "to")) |>
  rename(from=to, to=from, weight=weight.x) |>
  select(-weight.y) |>
  bind_rows(plot_data)
#              to       from       weight agreement_num com.x com.y
# 1    Kazakhstan Kyrgyzstan 1.337996e+08            51     2     2
# 2    Kazakhstan Kyrgyzstan 1.337996e+08           176     2     2
# 3    Kazakhstan Kyrgyzstan 1.337996e+08            58     2     2
# 4    Kazakhstan Kyrgyzstan 1.337996e+08           224     2     2
# 5    Kazakhstan Kyrgyzstan 1.337996e+08           133     2     2
# 6    Kazakhstan Kyrgyzstan 1.337996e+08           135     2     2
# 7   Afghanistan Kazakhstan 1.291072e+12            NA     2     2
# 8    Kyrgyzstan Kazakhstan 4.801609e+11            51     2     2
# 9    Kyrgyzstan Kazakhstan 4.801609e+11           176     2     2
# 10   Kyrgyzstan Kazakhstan 4.801609e+11            58     2     2
# 11   Kyrgyzstan Kazakhstan 4.801609e+11           224     2     2
# 12   Kyrgyzstan Kazakhstan 4.801609e+11           133     2     2
# 13   Kyrgyzstan Kazakhstan 4.801609e+11           135     2     2
# 14   Tajikistan Kazakhstan 3.474908e+12            58     2     2
# 15   Tajikistan Kazakhstan 3.474908e+12            51     2     2
# 16   Tajikistan Kazakhstan 3.474908e+12           224     2     2
# 17 Turkmenistan Kazakhstan 8.671038e+11            51     2     2
# 18 Turkmenistan Kazakhstan 8.671038e+11           224     2     2
# 19   Uzbekistan Kazakhstan 7.791981e+12            51     2     2
# 20   Uzbekistan Kazakhstan 7.791981e+12           224     2     2
# 21   Kazakhstan Kyrgyzstan 1.337996e+08            NA     2     2
# 22   Uzbekistan Kyrgyzstan 1.102379e+09            NA     2     2

(Note that the augmented rows are first.)