R - Vectorized implementation of ternary function

138 views Asked by At

I have three vectors X, Y and Z of equal length n. I need to create an n x n x n array of a function f(X[i],Y[j],Z[k]). The straightforward way to do this is to sequentially loop through each element of each of the 3 vectors. However, the time required to compute the array grows exponentially with n. Is there a way to implement this using vectorized operations?

EDIT: As mentioned in the comments, I have added a simple example of what's needed.

set.seed(1)
X = rnorm(10)
Y = seq(11,20)
Z = seq(21,30)

F = array(0, dim=c( length(X),length(Y),length(Z) ) )
for (i in 1:length(X))
  for (j in 1:length(Y))
    for (k in 1:length(Z))
      F[i,j,k] = X[i] * (Y[j] + Z[k])

Thanks.

3

There are 3 answers

5
Julien Navarre On BEST ANSWER

You can use nested outer :

set.seed(1)
X = rnorm(10)
Y = seq(11,20)
Z = seq(21,30)

F = array(0, dim = c( length(X),length(Y),length(Z) ) )
for (i in 1:length(X))
  for (j in 1:length(Y))
    for (k in 1:length(Z))
      F[i,j,k] = X[i] * (Y[j] + Z[k])

F2 <- outer(X, outer(Y, Z, "+"), "*")

> identical(F, F2)
[1] TRUE

A microbenchmark including the expand.grid solution proposed by Nick K :

X = rnorm(100)
Y = seq(1:100)
Z = seq(101:200)

forLoop <- function(X, Y, Z) {
  F = array(0, dim = c( length(X),length(Y),length(Z) ) )
  for (i in 1:length(X))
    for (j in 1:length(Y))
      for (k in 1:length(Z))
        F[i,j,k] = X[i] * (Y[j] + Z[k])
  return(F)
}

nestedOuter <- function(X, Y, Z) {
  outer(X, outer(Y, Z, "+"), "*")
}

expandGrid <- function(X, Y, Z) {
  df <- expand.grid(X = X, Y = Y, Z = Z)
  G <- df$X * (df$Y + df$Z)
  dim(G) <- c(length(X), length(Y), length(Z))
  return(G)
}

library(microbenchmark)
mbm <- microbenchmark(
  forLoop = F1 <- forLoop(X, Y, Z), 
  nestedOuter = F2 <- nestedOuter(X, Y, Z), 
  expandGrid = F3 <- expandGrid(X, Y, Z), 
  times = 50L)

> mbm
Unit: milliseconds
expr         min         lq        mean      median          uq        max neval
forLoop 3261.872552 3339.37383 3458.812265 3388.721159 3524.651971 4074.40422    50
nestedOuter    3.293461    3.36810    9.874336    3.541637    5.126789   54.24087    50
expandGrid   53.907789   57.15647   85.612048   88.286431  103.516819  235.45443    50
0
Nick Kennedy On

You could use expand.grid as follows:

df <- expand.grid(X = X, Y = Y, Z = Z)
G <- df$X * (df$Y + df$Z)
dim(G) <- c(length(X), length(Y), length(Z))
all.equal(F, G)

If you had a vectorised function, this would work just as well. If not, you could use plyr::daply.

0
David Arenburg On

Here's as an additional option, a possible Rcpp implementation (in case you like your loops). I wasn't able to outperform @Juliens solution though (maybe someone can), but they are more or less have the same timing

library(Rcpp)
cppFunction('NumericVector RCPP(NumericVector X,  NumericVector Y, NumericVector Z){

             int nrow = X.size(), ncol = 3, indx = 0;
             double temp(1) ;
             NumericVector out(pow(nrow, ncol)) ;
             IntegerVector dim(ncol) ;

             for (int l = 0; l < ncol; l++){
               dim[l] = nrow;
             }             

            for (int j = 0; j < nrow; j++) {
               for (int k = 0; k < nrow; k++) {
                     temp = Y[j] + Z[k] ;
                   for (int i = 0; i < nrow; i++) {
                         out[indx] = X[i] * temp ;
                         indx += 1 ;
                   }
               }
            }

            out.attr("dim") = dim;
            return out;
}')

Validating

identical(RCPP(X, Y, Z), F)
## [1] TRUE

A quick benchmark

set.seed(123)
X = rnorm(100)
Y = 1:100
Z = 101:200

nestedOuter <- function(X, Y, Z) outer(X, outer(Y, Z, "+"), "*")

library(microbenchmark)
microbenchmark( 
  nestedOuter = nestedOuter(X, Y, Z),  
  RCPP = RCPP(X, Y, Z),
  unit = "relative",
  times = 1e4)

# Unit: relative
#        expr      min       lq     mean   median       uq       max neval
# nestedOuter 1.000000 1.000000 1.000000 1.000000 1.000000 1.0000000 10000
#        RCPP 1.164254 1.141713 1.081235 1.100596 1.080133 0.7092394 10000