Flatten list to R dataframe

393 views Asked by At

I have a nested list that I want to convert to a dataframe using R, similar to this question flatten a data frame

Here is the structure of my list

> str(rf_curves$GBP)
List of 27
 $ NA                  :'data.frame':   0 obs. of  2 variables:
  ..$ date   :Class 'Date'  int(0) 
  ..$ px_last: num(0) 
 $ BP0012M       Index :'data.frame':   5 obs. of  2 variables:
  ..$ date   : Date[1:5], format: "2018-05-21" "2018-05-22" ...
  ..$ px_last: num [1:5] 0.929 0.931 0.918 0.918 0.901
 $ BP0003M       Index :'data.frame':   5 obs. of  2 variables:
  ..$ date   : Date[1:5], format: "2018-05-21" "2018-05-22" ...
  ..$ px_last: num [1:5] 0.623 0.623 0.619 0.614 0.611
 $ BP0006M       Index :'data.frame':   5 obs. of  2 variables:
  ..$ date   : Date[1:5], format: "2018-05-21" "2018-05-22" ...
  ..$ px_last: num [1:5] 0.746 0.743 0.734 0.733 0.723
 $ NA                  :'data.frame':   0 obs. of  2 variables:
  ..$ date   :Class 'Date'  int(0) 
  ..$ px_last: num(0) 

I would like a dataframe with

  • date on the rows
  • the ticker on the columns (BP0012M and BP0003M are example of tickers)
  • cells populated with px_last.

So a sample of the dataframe would be:

date           NA   BP0012M BP0003M BOP0006M
2018-05-21          0.929   0.623   0.746
2018-05-22          0.931   0.623   0.743
2018-05-23          0.918   0.619   0.743
2018-05-24          0.918   0.614   0.733
2018-05-25          0.901   0.611   0.723

The final goal is to have a convinient way to get the risk-free curve given a particular start date. For example BP0012M is the 12 month GBP libor. I currently load data from bloomberg, using library(Rblpapi). If I can get the same data from another provider e.g. Quandl that is ok. If I can achieve this goal, without flatten the list to a dataframe, I'm fine with that solution too.


Edit: Requested output is pasted below

> dput(rf_curves$GBP)
structure(list(`NA` = structure(list(date = structure(integer(0), class = "Date"), 
    px_last = numeric(0)), class = "data.frame", .Names = c("date", 
"px_last"), row.names = integer(0)), `BP0012M       Index` = structure(list(
    date = structure(17672:17676, class = "Date"), px_last = c(0.92894, 
    0.93081, 0.91831, 0.9182, 0.90056)), class = "data.frame", .Names = c("date", 
"px_last"), row.names = c(NA, 5L)), `BP0003M       Index` = structure(list(
    date = structure(17672:17676, class = "Date"), px_last = c(0.62281, 
    0.6225, 0.619, 0.61406, 0.61067)), class = "data.frame", .Names = c("date", 
"px_last"), row.names = c(NA, 5L)), `BP0006M       Index` = structure(list(
    date = structure(17672:17676, class = "Date"), px_last = c(0.7463, 
    0.74323, 0.73411, 0.73321, 0.72312)), class = "data.frame", .Names = c("date", 
"px_last"), row.names = c(NA, 5L)), `NA` = structure(list(date = structure(integer(0), class = "Date"), 
    px_last = numeric(0)), class = "data.frame", .Names = c("date", 
"px_last"), row.names = integer(0)), `NA` = structure(list(date = structure(integer(0), class = "Date"), 
    px_last = numeric(0)), class = "data.frame", .Names = c("date", 
"px_last"), row.names = integer(0)), `NA` = structure(list(date = structure(integer(0), class = "Date"), 
    px_last = numeric(0)), class = "data.frame", .Names = c("date", 
"px_last"), row.names = integer(0)), `NA` = structure(list(date = structure(integer(0), class = "Date"), 
    px_last = numeric(0)), class = "data.frame", .Names = c("date", 
"px_last"), row.names = integer(0)), `NA` = structure(list(date = structure(integer(0), class = "Date"), 
    px_last = numeric(0)), class = "data.frame", .Names = c("date", 
"px_last"), row.names = integer(0)), `BPSW30   CMPN Curncy` = structure(list(
    date = structure(c(17672L, 17673L, 17674L, 17675L, 17676L, 
    17680L, 17681L), class = "Date"), px_last = c(1.758, 1.768, 
    1.715, 1.696, 1.628, 1.531, 1.56725)), class = "data.frame", .Names = c("date", 
"px_last"), row.names = c(NA, 7L)), `NA` = structure(list(date = structure(integer(0), class = "Date"), 
    px_last = numeric(0)), class = "data.frame", .Names = c("date", 
"px_last"), row.names = integer(0)), `NA` = structure(list(date = structure(integer(0), class = "Date"), 
    px_last = numeric(0)), class = "data.frame", .Names = c("date", 
"px_last"), row.names = integer(0)), `BPSW8    CMPN Curncy` = structure(list(
    date = structure(c(17672L, 17673L, 17674L, 17675L, 17676L, 
    17680L, 17681L), class = "Date"), px_last = c(1.5675, 1.5955, 
    1.5375, 1.5175, 1.4475, 1.342, 1.37775)), class = "data.frame", .Names = c("date", 
"px_last"), row.names = c(NA, 7L)), `BPSW1F   CMPN Curncy` = structure(list(
    date = structure(c(17672L, 17673L, 17674L, 17675L, 17676L, 
    17680L, 17681L), class = "Date"), px_last = c(0.942, 0.9575, 
    0.9225, 0.9188, 0.8864, 0.84505, 0.863)), class = "data.frame", .Names = c("date", 
"px_last"), row.names = c(NA, 7L)), `BPSW9    CMPN Curncy` = structure(list(
    date = structure(c(17672L, 17673L, 17674L, 17675L, 17676L, 
    17680L, 17681L), class = "Date"), px_last = c(1.6115, 1.6395, 
    1.5795, 1.5585, 1.4885, 1.381, 1.419)), class = "data.frame", .Names = c("date", 
"px_last"), row.names = c(NA, 7L)), `BPSW2    CMPN Curncy` = structure(list(
    date = structure(c(17672L, 17673L, 17674L, 17675L, 17676L, 
    17679L, 17680L, 17681L), class = "Date"), px_last = c(1.0335, 
    1.0508, 1.0094, 0.9988, 0.9674, 0.9674, 0.9027, 0.92975)), class = "data.frame", .Names = c("date", 
"px_last"), row.names = c(NA, 8L)), `BPSW10   CMPN Curncy` = structure(list(
    date = structure(c(17672L, 17673L, 17674L, 17675L, 17676L, 
    17679L, 17680L, 17681L), class = "Date"), px_last = c(1.651, 
    1.675, 1.616, 1.593, 1.52, 1.52, 1.427, 1.455)), class = "data.frame", .Names = c("date", 
"px_last"), row.names = c(NA, 8L)), `NA` = structure(list(date = structure(integer(0), class = "Date"), 
    px_last = numeric(0)), class = "data.frame", .Names = c("date", 
"px_last"), row.names = integer(0)), `BPSW3    CMPN Curncy` = structure(list(
    date = structure(c(17672L, 17673L, 17674L, 17675L, 17676L, 
    17680L, 17681L), class = "Date"), px_last = c(1.1795, 1.2025, 
    1.1525, 1.1445, 1.0965, 1.01, 1.0435)), class = "data.frame", .Names = c("date", 
"px_last"), row.names = c(NA, 7L)), `BPSW12   CMPN Curncy` = structure(list(
    date = structure(c(17672L, 17673L, 17674L, 17675L, 17676L, 
    17680L, 17681L), class = "Date"), px_last = c(1.7105, 1.7325, 
    1.6735, 1.6495, 1.5795, 1.474, 1.5115)), class = "data.frame", .Names = c("date", 
"px_last"), row.names = c(NA, 7L)), `BPSW4    CMPN Curncy` = structure(list(
    date = structure(c(17672L, 17673L, 17674L, 17675L, 17676L, 
    17680L, 17681L), class = "Date"), px_last = c(1.294, 1.33, 
    1.27, 1.258, 1.202, 1.107, 1.1371)), class = "data.frame", .Names = c("date", 
"px_last"), row.names = c(NA, 7L)), `BPSW15   CMPN Curncy` = structure(list(
    date = structure(c(17672L, 17673L, 17674L, 17675L, 17676L, 
    17680L, 17681L), class = "Date"), px_last = c(1.7615, 1.7805, 
    1.7225, 1.6985, 1.6295, 1.525, 1.5615)), class = "data.frame", .Names = c("date", 
"px_last"), row.names = c(NA, 7L)), `BPSW5    CMPN Curncy` = structure(list(
    date = structure(c(17672L, 17673L, 17674L, 17675L, 17676L, 
    17680L, 17681L), class = "Date"), px_last = c(1.3855, 1.4155, 
    1.3595, 1.3465, 1.2875, 1.185, 1.21425)), class = "data.frame", .Names = c("date", 
"px_last"), row.names = c(NA, 7L)), `BPSW20   CMPN Curncy` = structure(list(
    date = structure(c(17672L, 17673L, 17674L, 17675L, 17676L, 
    17680L, 17681L), class = "Date"), px_last = c(1.7895, 1.8045, 
    1.7485, 1.7255, 1.6565, 1.554, 1.5895)), class = "data.frame", .Names = c("date", 
"px_last"), row.names = c(NA, 7L)), `BPSW6    CMPN Curncy` = structure(list(
    date = structure(c(17672L, 17673L, 17674L, 17675L, 17676L, 
    17680L, 17681L), class = "Date"), px_last = c(1.4565, 1.4855, 
    1.4285, 1.4135, 1.35725, 1.2555, 1.278)), class = "data.frame", .Names = c("date", 
"px_last"), row.names = c(NA, 7L)), `BPSW25   CMPN Curncy` = structure(list(
    date = structure(c(17672L, 17673L, 17674L, 17675L, 17676L, 
    17680L, 17681L), class = "Date"), px_last = c(1.778, 1.791, 
    1.737, 1.716, 1.647, 1.548, 1.5835)), class = "data.frame", .Names = c("date", 
"px_last"), row.names = c(NA, 7L)), `BPSW7    CMPN Curncy` = structure(list(
    date = structure(c(17672L, 17673L, 17674L, 17675L, 17676L, 
    17680L, 17681L), class = "Date"), px_last = c(1.5155, 1.5445, 
    1.4875, 1.4695, 1.4025, 1.298, 1.331)), class = "data.frame", .Names = c("date", 
"px_last"), row.names = c(NA, 7L))), .Names = c("NA", "BP0012M       Index", 
"BP0003M       Index", "BP0006M       Index", "NA", "NA", "NA", 
"NA", "NA", "BPSW30   CMPN Curncy", "NA", "NA", "BPSW8    CMPN Curncy", 
"BPSW1F   CMPN Curncy", "BPSW9    CMPN Curncy", "BPSW2    CMPN Curncy", 
"BPSW10   CMPN Curncy", "NA", "BPSW3    CMPN Curncy", "BPSW12   CMPN Curncy", 
"BPSW4    CMPN Curncy", "BPSW15   CMPN Curncy", "BPSW5    CMPN Curncy", 
"BPSW20   CMPN Curncy", "BPSW6    CMPN Curncy", "BPSW25   CMPN Curncy", 
"BPSW7    CMPN Curncy"))
1

There are 1 answers

3
Weihuang Wong On BEST ANSWER

One approach is to row-bind the dataset, then use tidyr::spread. Suppose your list of dataframes is dat, then

library(dplyr)
library(tidyr)
out <- bind_rows(dat, .id = "ticker") %>%
  mutate(ticker = gsub("^([A-Z0-9]+).*$", "\\1", ticker)) %>%
  spread(key = ticker, value = px_last)

where the gsub cleans upticker to include only the ticker itself. The output looks like

out[, 1:6]
#         date BP0003M BP0006M BP0012M BPSW10 BPSW12
# 1 2018-05-21 0.62281 0.74630 0.92894  1.651 1.7105
# 2 2018-05-22 0.62250 0.74323 0.93081  1.675 1.7325
# 3 2018-05-23 0.61900 0.73411 0.91831  1.616 1.6735
# 4 2018-05-24 0.61406 0.73321 0.91820  1.593 1.6495
# 5 2018-05-25 0.61067 0.72312 0.90056  1.520 1.5795
# 6 2018-05-28      NA      NA      NA  1.520     NA
# 7 2018-05-29      NA      NA      NA  1.427 1.4740
# 8 2018-05-30      NA      NA      NA  1.455 1.5115