Fuzzy Join Error: All columns in a tibble must be vectors

3.1k views Asked by At
test <- structure(list(trip_count = 1:10, dropoff_longitude = c(-73.959862, 
-73.882202, -73.934113, -73.992203, -74.00563, -73.975189, -73.97448, 
-73.974838, -73.981377, -73.955093), dropoff_latitude = c(40.773617, 
40.744175, 40.715923, 40.749203, 40.726158, 40.729824, 40.763599, 
40.754135, 40.759987, 40.765224)), row.names = c(NA, -10L), class = c("data.table", 
"data.frame"), .internal.selfref = <pointer: 0x7fd18800f6e0>)
> dput(zip_codes)

zip_codes <- structure(list(zipcode = c("10001", "10002", "10003", "10004", 
"10005", "10006", "10007", "10009", "10010", "10011", "10012", 
"10013", "10014", "10016", "10017", "10018", "10019", "10020", 
"10021", "10022", "10023", "10024", "10025", "10026", "10027", 
"10028", "10029", "10030", "10031", "10032", "10033", "10034", 
"10035", "10036", "10037", "10038", "10039", "10040", "10044", 
"10065", "10069", "10075", "10103", "10110", "10111", "10112", 
"10115", "10119", "10128", "10152", "10153", "10154", "10162", 
"10165", "10167", "10168", "10169", "10170", "10171", "10172", 
"10173", "10174", "10177", "10199", "10271", "10278", "10279", 
"10280", "10282"), bounds_north = c(40.759731, 40.724136, 40.739673, 
40.709044, 40.709294, 40.71369, 40.71719, 40.734975, 40.745421, 
40.756703, 40.731706, 40.727557, 40.742873, 40.752197, 40.757912, 
40.762526, 40.773446, 40.761094, 40.775045, 40.764898, 40.783192, 
40.818099, 40.811264, 40.807546, 40.822108, 40.782213, 40.800665, 
40.824032, 40.834372, 40.850517, 40.861552, 40.87765, 40.809582, 
40.765558, 40.819569, 40.714451, 40.846615, 40.866336, 40.772955, 
40.770517, 40.781007, 40.777677, 40.761771, 40.755516, 40.759689, 
40.759899, 40.811331, 40.751522, 40.787914, 40.759059, 40.764279, 
40.758432, 40.770085, 40.752801, 40.755303, 40.752119, 40.754974, 
40.753811, 40.756556, 40.755928, 40.754783, 40.752116, 40.7556, 
40.752723, 40.708797, 40.71628, 40.713256, 40.714767, 40.719611
), bounds_south = c(40.743451, 40.708802, 40.722933, 40.683919, 
40.702879, 40.705871, 40.709806, 40.718612, 40.73231, 40.731043, 
40.719867, 40.713446, 40.72428, 40.73801, 40.747251, 40.749102, 
40.758645, 40.757284, 40.758133, 40.751445, 40.768436, 40.778805, 
40.788476, 40.79691, 40.803047, 40.770062, 40.782531, 40.812791, 
40.817221, 40.829083, 40.842958, 40.849745, 40.781075, 40.752197, 
40.806636, 40.701689, 40.817912, 40.851863, 40.749415, 40.759284, 
40.771612, 40.769441, 40.759787, 40.753481, 40.758538, 40.758436, 
40.810373, 40.749101, 40.773108, 40.757749, 40.762964, 40.757125, 
40.768355, 40.75146, 40.753994, 40.750775, 40.753811, 40.751441, 
40.755243, 40.754619, 40.753481, 40.750766, 40.754678, 40.750241, 
40.707694, 40.714082, 40.711995, 40.700273, 40.713378), bounds_east = c(-73.984076, 
-73.973635, -73.979864, -73.995657, -74.004569, -74.009988, -74.000455, 
-73.971282, -73.971566, -73.990798, -73.991794, -73.994035, -73.999555, 
-73.968192, -73.964271, -73.981822, -73.973015, -73.977201, -73.947973, 
-73.958599, -73.974067, -73.960687, -73.954966, -73.944667, -73.940404, 
-73.944337, -73.930891, -73.936232, -73.938588, -73.934671, -73.92216, 
-73.910587, -73.914228, -73.978116, -73.933219, -73.991772, -73.929107, 
-73.924385, -73.940026, -73.952085, -73.986609, -73.947039, -73.975831, 
-73.980395, -73.976744, -73.97845, -73.963058, -73.99111, -73.937328, 
-73.970993, -73.971411, -73.971451, -73.94827, -73.977677, -73.973735, 
-73.976048, -73.975209, -73.974648, -73.97282, -73.973276, -73.978332, 
-73.973959, -73.975352, -73.993948, -74.009829, -74.002115, -74.007666, 
-74.013754, -74.012441), bounds_west = c(-74.008621, -73.997532, 
-73.999604, -74.047285, -74.012508, -74.015905, -74.013754, -73.988643, 
-73.994028, -74.012359, -74.004575, -74.016381, -74.01599, -73.987746, 
-73.981822, -74.007989, -74.003477, -73.98373, -73.968441, -73.977655, 
-73.990149, -73.98814, -73.977092, -73.962475, -73.9659, -73.96323, 
-73.955778, -73.948677, -73.960007, -73.950403, -73.944672, -73.947051, 
-73.946462, -74.001702, -73.943398, -74.010542, -73.943506, -73.938947, 
-73.961583, -73.972553, -73.996142, -73.965148, -73.979513, -73.984118, 
-73.97845, -73.980886, -73.964424, -73.994844, -73.959921, -73.973068, 
-73.973465, -73.973524, -73.951858, -73.979768, -73.975807, -73.978159, 
-73.976974, -73.977107, -73.974897, -73.975352, -73.980395, -73.976048, 
-73.976516, -74.00143, -74.011248, -74.00542, -74.009668, -74.019603, 
-74.01831), zip = c(10001, 10002, 10003, 10004, 10005, 10006, 
10007, 10009, 10010, 10011, 10012, 10013, 10014, 10016, 10017, 
10018, 10019, 10020, 10021, 10022, 10023, 10024, 10025, 10026, 
10027, 10028, 10029, 10030, 10031, 10032, 10033, 10034, 10035, 
10036, 10037, 10038, 10039, 10040, 10044, 10065, 10069, 10075, 
10103, 10110, 10111, 10112, 10115, 10119, 10128, 10152, 10153, 
10154, 10162, 10165, 10167, 10168, 10169, 10170, 10171, 10172, 
10173, 10174, 10177, 10199, 10271, 10278, 10279, 10280, 10282
)), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 9L, 10L, 11L, 12L, 
13L, 14L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 
27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 
40L, 43L, 50L, 51L, 53L, 67L, 74L, 75L, 76L, 79L, 83L, 91L, 101L, 
102L, 103L, 111L, 114L, 116L, 117L, 118L, 119L, 120L, 121L, 122L, 
123L, 126L, 133L, 151L, 158L, 159L, 160L, 162L), class = "data.frame")

Hey guys, so I am trying to fuzzy-join lat & lon information to get the zip code of a specific location. I tried:

test <- test %>% fuzzy_left_join(zip_codes,by = c("dropoff_longitude" = "bounds_east", "dropoff_longitude" = "bounds_west", "dropoff_latitude" = "bounds_north","dropoff_latitude" = "bounds_south"), match_fun = list('<=', '>=' , '<=', '>='))

But unfortunately, this returns the error message Error: All columns in a tibble must be vectors. x Column "col" is NULL. I don't know how to solve this. There is no column "col" in either one of the data frames. The result should give me the correspondent zip code if the dropoff_longitute is between bounds_east and bounds_west and the dropoff_latitude is between bounds_north and bounds_south. Thanks a lot in advance!

1

There are 1 answers

0
akrun On

We could use the non-equi join from data.table as one of the dataset is data.table

library(data.table)
setDT(test)[zip_codes, on = .(dropoff_longitude <= bounds_east, 
    dropoff_longitude >= bounds_west,
    dropoff_latitude <= bounds_north, 
    dropoff_latitude >= bounds_south)]