I have a following problem.
In first step suppose I have a data frame with 3 facts (a, b, c).
library(dplyr)
library(tidyr)
library(CVXR)
library(RcppRoll)
library(purrr)
set.seed(1234)
df = tibble(
a = c(4.88,5.03,5.11,4.77,5.04,5.05,4.94,4.95,4.94,4.91)
,b = c(652,600,622,706,796,689,649,609,616,942)
,c = c(101,95,96,105,93,86,106,90,100,91)
)
Then, I do some transformations (in example it's rolling sum but here I would like to do more sophisticated things) on columns b and c, and calculate target column (y).
df = df %>%
mutate(b_roll_sum = roll_sum(b, n=3, fill=NA, align="right", na.rm = TRUE),
c_roll_sum = roll_sum(c, n=3, fill=NA, align="right", na.rm = TRUE)) %>%
mutate(y = (-1)*a+0.0002*b_roll_sum+0.0007*c_roll_sum+1)
# A tibble: 10 x 6
a b c b_roll_sum c_roll_sum y
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 4.88 652 101 NA NA NA
2 5.03 600 95 NA NA NA
3 5.11 622 96 1874 292 -3.53
4 4.77 706 105 1928 296 -3.18
5 5.04 796 93 2124 294 -3.41
6 5.05 689 86 2191 284 -3.41
7 4.94 649 106 2134 285 -3.31
8 4.95 609 90 1947 282 -3.36
9 4.94 616 100 1874 296 -3.36
10 4.91 942 91 2167 281 -3.28
Now the goal is to relocate numbers between columns b and c:
- keeping distribution in b and c like in the beginning (if sum of given kolumn is greater than 0)
- keeping sum of columns b and c constant (7844)
- b and c should both be >= 0
to maximize y.
I tried to use CVXR package for it, where I'm definig objective as my custom function of data frame and object Variable(). The code seems to run but results are wrong since solution should be to "relocate everything" to column c. Output however is the other way around.
# calculate distribution in rows to keep them like before
dist_by_rows <- df %>% map2_dfr(.x = df %>% select(b, c)
,.y = df %>% select(b, c) %>% summarise_all(sum)
,.f = ~(.x/.y))
names(dist_by_rows) <- paste0(names(dist_by_rows), "_rows_dist")
df <- bind_cols(df, dist_by_rows)
# A tibble: 10 x 8
a b c b_roll_sum c_roll_sum y b_rows_dist c_rows_dist
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 4.88 652 101 NA NA NA 0.116 0.132
2 5.03 600 95 NA NA NA 0.107 0.124
3 5.11 622 96 1874 292 -3.53 0.110 0.125
4 4.77 706 105 1928 296 -3.18 0.125 0.137
5 5.04 796 93 2124 294 -3.41 0.141 0.121
6 5.05 689 86 2191 284 -3.41 0.122 0.112
7 4.94 649 106 2134 285 -3.31 0.115 0.138
8 4.95 609 90 1947 282 -3.36 0.108 0.117
9 4.94 616 100 1874 296 -3.36 0.109 0.130
10 4.91 942 91 2167 281 -3.28 0.167 0.119
# define function to optimize
funk <- function(df, vars_to_opt) {
df_new <- df %>%
mutate(
new_b = value(vars_to_opt)[1],
new_c = value(vars_to_opt)[2],
b = new_b*b_rows_dist,
c = new_c*c_rows_dist) %>%
mutate(b_roll_sum = roll_sum(b, n=3, fill=NA, align="right", na.rm = TRUE),
c_roll_sum = roll_sum(c, n=3, fill=NA, align="right", na.rm = TRUE)) %>%
mutate(y = (-1)*a+0.0002*b_roll_sum+0.0007*c_roll_sum+1)
df_new %>%
select(y) %>%
sum(., na.rm = T)
}
# test of function on "current status"
test <- Variable(2)
value(test) <- matrix(c(6881, 963), nrow = 2) #currently sum of b and c is 6881 and 963, respectively
> funk(df, vars_to_opt = test)
[1] -26.8452
> df %>% select(y) %>% sum(na.rm = T)
[1] -26.8452
# CVXR with constraints
mix_hat <- Variable(2)
objective <- Maximize(funk(df, vars_to_opt = mix_hat))
A <- matrix(rep(1, 2), nrow = 1)
B <- diag(1, nrow = 2)
constraint1 <- A %*% mix_hat == 7844 #sum of b and c keep like it was 7844
constraint2 <- B %*% mix_hat >= 0 #b & c non negative
problem <- Problem(objective, constraints = list(constraint1, constraint2))
result <- solve(problem, b = "GLPK")
> result$getValue(mix_hat)
[,1]
[1,] 7844
[2,] 0
> result$value
[1] -31.71