Is it possibile, with CVXR package, to optimize function that works on a data frame?

165 views Asked by At

I have a following problem.
In first step suppose I have a data frame with 3 facts (a, b, c).

library(dplyr)
library(tidyr)
library(CVXR)
library(RcppRoll)
library(purrr)

set.seed(1234)

df = tibble(
  a = c(4.88,5.03,5.11,4.77,5.04,5.05,4.94,4.95,4.94,4.91)
  ,b = c(652,600,622,706,796,689,649,609,616,942)
  ,c = c(101,95,96,105,93,86,106,90,100,91)
)

Then, I do some transformations (in example it's rolling sum but here I would like to do more sophisticated things) on columns b and c, and calculate target column (y).

df = df %>% 
  mutate(b_roll_sum = roll_sum(b, n=3, fill=NA, align="right", na.rm = TRUE),
         c_roll_sum = roll_sum(c, n=3, fill=NA, align="right", na.rm = TRUE)) %>% 
  mutate(y = (-1)*a+0.0002*b_roll_sum+0.0007*c_roll_sum+1)

# A tibble: 10 x 6
       a     b     c b_roll_sum c_roll_sum     y
   <dbl> <dbl> <dbl>      <dbl>      <dbl> <dbl>
 1  4.88   652   101         NA         NA NA   
 2  5.03   600    95         NA         NA NA   
 3  5.11   622    96       1874        292 -3.53
 4  4.77   706   105       1928        296 -3.18
 5  5.04   796    93       2124        294 -3.41
 6  5.05   689    86       2191        284 -3.41
 7  4.94   649   106       2134        285 -3.31
 8  4.95   609    90       1947        282 -3.36
 9  4.94   616   100       1874        296 -3.36
10  4.91   942    91       2167        281 -3.28

Now the goal is to relocate numbers between columns b and c:

  • keeping distribution in b and c like in the beginning (if sum of given kolumn is greater than 0)
  • keeping sum of columns b and c constant (7844)
  • b and c should both be >= 0

to maximize y.

I tried to use CVXR package for it, where I'm definig objective as my custom function of data frame and object Variable(). The code seems to run but results are wrong since solution should be to "relocate everything" to column c. Output however is the other way around.

# calculate distribution in rows to keep them like before
dist_by_rows <- df %>% map2_dfr(.x = df %>% select(b, c)
                                ,.y = df %>% select(b, c) %>% summarise_all(sum)
                                ,.f = ~(.x/.y))
names(dist_by_rows) <- paste0(names(dist_by_rows), "_rows_dist")
df <- bind_cols(df, dist_by_rows)


# A tibble: 10 x 8
       a     b     c b_roll_sum c_roll_sum     y b_rows_dist c_rows_dist
   <dbl> <dbl> <dbl>      <dbl>      <dbl> <dbl>       <dbl>       <dbl>
 1  4.88   652   101         NA         NA NA          0.116       0.132
 2  5.03   600    95         NA         NA NA          0.107       0.124
 3  5.11   622    96       1874        292 -3.53       0.110       0.125
 4  4.77   706   105       1928        296 -3.18       0.125       0.137
 5  5.04   796    93       2124        294 -3.41       0.141       0.121
 6  5.05   689    86       2191        284 -3.41       0.122       0.112
 7  4.94   649   106       2134        285 -3.31       0.115       0.138
 8  4.95   609    90       1947        282 -3.36       0.108       0.117
 9  4.94   616   100       1874        296 -3.36       0.109       0.130
10  4.91   942    91       2167        281 -3.28       0.167       0.119


# define function to optimize
funk <- function(df, vars_to_opt) {

df_new <- df %>% 
  mutate(
    new_b = value(vars_to_opt)[1],
    new_c = value(vars_to_opt)[2],
    b = new_b*b_rows_dist,
    c = new_c*c_rows_dist) %>% 
  mutate(b_roll_sum = roll_sum(b, n=3, fill=NA, align="right", na.rm = TRUE),
         c_roll_sum = roll_sum(c, n=3, fill=NA, align="right", na.rm = TRUE)) %>% 
  mutate(y = (-1)*a+0.0002*b_roll_sum+0.0007*c_roll_sum+1)

df_new %>%
  select(y) %>%
  sum(., na.rm = T)

}

# test of function on "current status"
test <- Variable(2)
value(test) <- matrix(c(6881, 963), nrow = 2) #currently sum of b and c is 6881 and 963, respectively

> funk(df, vars_to_opt = test)
[1] -26.8452

> df %>% select(y) %>% sum(na.rm = T)
[1] -26.8452


# CVXR with constraints
mix_hat <- Variable(2)
objective <- Maximize(funk(df, vars_to_opt = mix_hat))

A <- matrix(rep(1, 2), nrow = 1) 
B <- diag(1, nrow = 2)

constraint1 <- A %*% mix_hat == 7844 #sum of b and c keep like it was 7844
constraint2 <- B %*% mix_hat >= 0 #b & c non negative


problem <- Problem(objective, constraints = list(constraint1, constraint2))
result <- solve(problem, b = "GLPK")

> result$getValue(mix_hat)
     [,1]
[1,] 7844
[2,]    0
> result$value
[1] -31.71
0

There are 0 answers