Following the tidy data standard, I have my data in long format with a key and a value column. The values for some keys are numeric, for others are characters, and so R has the entire column set as character class.
When I use filter()
to pipe only the numeric data to ggplot (data with key 'a' is numeric), and then use as.numeric()
on the value definition, it does not convert correctly - I see to just get sequence numbers instead of values. What am I doing wrong?
filter(data, measure == "a") %>%
ggplot(aes(x = as.numeric(value),
x = as.factor(subject_instance),
color = as.factor(subject_instance))) +
geom_boxplot()
What is the best way to handle mixed classes in long format (i.e. is it other than the way I'm doing it).
OR
How to I get ggplot to convert to numeric correctly?
Workable example (sample of 40 rows from larger set):
mydata <- structure(list(ResponseID = c("R_40LUIW7O8Lnj7Cd", "R_aXo4IXJ2eRTyThr",
"R_9sHFiKGtn4ZhNiJ", "R_0BMN3JynUPiB0dn", "R_9mqmDcAKzae6ko5",
"R_4T7qN9appsgbnxj", "R_5BeXW1ygKISxISV", "R_3JJY4UGvbzzDYTX",
"R_0AN81Cdgz7ncPDD", "R_aXo4IXJ2eRTyThr", "R_40LUIW7O8Lnj7Cd",
"R_8BOtUltxr8O6AeN", "R_40LUIW7O8Lnj7Cd", "R_1KUj25KpGbKOaGh",
"R_5BeXW1ygKISxISV", "R_0AN81Cdgz7ncPDD", "R_aXo4IXJ2eRTyThr",
"R_aXo4IXJ2eRTyThr", "R_0N8LUMfEP12P4Wh", "R_0wuddsG9KJhHkRn",
"R_1R3kGCm3vPWi4dL", "R_50W5K8wp8m1yOZ7", "R_0wuddsG9KJhHkRn",
"R_ctKujSc0Zr5fldz", "R_4SDzTFmolPaB8wt", "R_0Ng4gEnCnkCTuoB",
"R_0Ng4gEnCnkCTuoB", "R_eb5LkAh0nBVqc9n", "R_0vqNorszrDGN6MB",
"R_40LUIW7O8Lnj7Cd", "R_6s1Q2hFaqRLMKKF", "R_8BOtUltxr8O6AeN",
"R_4SDzTFmolPaB8wt", "R_3JJY4UGvbzzDYTX", "R_3JJY4UGvbzzDYTX",
"R_77mJUnh0OPtvCEl", "R_bxtLgQnlf4iaCWx", "R_6s1Q2hFaqRLMKKF",
"R_7X8L8LwKo6UdWgR", "R_9mqmDcAKzae6ko5"), ID = c("R_3I0G7xzqlA4lUmm",
"R_12m5J3hXrv8ObMa", "R_3PmEIrRgCUr0X3L", "R_YQuCAn43cgRMHy9",
"R_51GdFWDnxQ7zvpv", "R_x9g4FVQzeqAJG8h", "R_QmDHvIxNJUypJip",
"R_2cuyzZ8C4khOGs8", "R_3fUUNvffCN7GUrn", "R_12m5J3hXrv8ObMa",
"R_3I0G7xzqlA4lUmm", "R_xgbhYoALaqQ9TDX", "R_3I0G7xzqlA4lUmm",
"R_28I21bSyxgRcyGo", "R_QmDHvIxNJUypJip", "R_3fUUNvffCN7GUrn",
"R_12m5J3hXrv8ObMa", "R_12m5J3hXrv8ObMa", "R_9L8RxssmQOGrPAR",
"R_3iExjba1az5mpLw", "R_2wodtnGyQkaGTbX", "R_dnln2Bzdjahd3ax",
"R_3iExjba1az5mpLw", "R_29gE0fK7dB6HENJ", "R_2E0mBlZmT618zQp",
"R_3EVZt1ncuzTbVRr", "R_3EVZt1ncuzTbVRr", "R_2anUpVhXXReyZAX",
"R_1dz55WFaXZ3Lm3Y", "R_3I0G7xzqlA4lUmm", "R_vUJsBPPRxV9J6CJ",
"R_xgbhYoALaqQ9TDX", "R_2E0mBlZmT618zQp", "R_2cuyzZ8C4khOGs8",
"R_2cuyzZ8C4khOGs8", "R_3LYcR4i5YB2k0N0", "R_yL9qi0TMXHfuJK9",
"R_vUJsBPPRxV9J6CJ", "R_1DqckuFAYHkKjDg", "R_51GdFWDnxQ7zvpv"
), icon = c(".rprt", ".mddm", ".cnsl", "ord.cnsl", "sgn.alrt",
"ent.advr", "flg.lab2", "ord.lab2", ".mddm", ".mds2", "rmv.prb2",
"sch.imgn", "edt.not2", "edt.prb4", "ord.lab", "grp.lab", "src.note",
"sgn.alrt", "sgn.imgn", "sch.lab", "sch.lab", ".note", "viw.imgn",
"flg.lab2", ".mddm", "ent.prbl", "ent.vtls", "ord.med", ".hstr",
"rnw.alrt", "ent.vtls", "viw.vtls", "sch.lab2", "edt.note", "rnw.med",
"ord.prcd", "rmv.prbl", "crt.grph", "edt.prb3", "ent.prb2"),
measure = c("firstclick", "lastclick", "subject", "clickcount",
"pagesubmit", "firstclick", "subject", "lastclick", "subject",
"pagesubmit", "firstclick", "subject", "clickcount", "lastclick",
"clickcount", "firstclick", "pagesubmit", "pagesubmit", "pagesubmit",
"lastclick", "action", "clickcount", "firstclick", "clickcount",
"subject", "clickcount", "firstclick", "lastclick", "subject",
"pagesubmit", "action", "lastclick", "lastclick", "pagesubmit",
"clickcount", "firstclick", "firstclick", "action", "pagesubmit",
"subject"), value = c("2.602", "4.849", "Consult(s)", "6",
"180", "1.456", "Lab / Imaging / Diagnostic", "70.335", "Medication(s)",
"180", "1.133", "Lab / Imaging / Diagnostic", "4", "3.938",
"4", "3.003", "180", "180", "180", "20.519", "Schedule",
"4", "4.758", "4", "Medication(s)", "4", "1.706", "8.582",
"Patient history", "11.599", "Enter", "9.098", "11.897",
"180", "4", "1.728", "2.423", "Search", "180", "Problem(s)"
), file = structure(c(60L, 37L, 4L, 41L, 67L, 17L, 25L, 44L,
37L, 39L, 57L, 63L, 11L, 15L, 43L, 29L, 66L, 67L, 68L, 64L,
64L, 40L, 78L, 25L, 37L, 20L, 22L, 45L, 33L, 58L, 22L, 87L,
65L, 10L, 59L, 47L, 56L, 7L, 14L, 21L), .Label = c("alert.png",
"allergies.png", "check-order.png", "consult.png", "copy-graph.png",
"create-encounter.png", "create-graph.png", "create-note.png",
"create-report.png", "edit-note.png", "edit-note2.png", "edit-problem.png",
"edit-problem2.png", "edit-problem3.png", "edit-problem4.png",
"encounter.png", "enter-adverse.png", "enter-med.png", "enter-medadmin.png",
"enter-problem.png", "enter-problem2.png", "enter-vitals.png",
"flag-imaging.png", "flag-lab.png", "flag-lab2.png", "flag-order.png",
"followup.png", "forward-alert.png", "graph-lab.png", "graph-lab2.png",
"graph-vitals.png", "graph.png", "history.png", "imaging.png",
"lab1.png", "lab2.png", "medadmin.png", "meds1.png", "meds2.png",
"note.png", "order-consult.png", "order-imaging.png", "order-lab.png",
"order-lab2.png", "order-med.png", "order-med2.png", "order-procedure.png",
"order-procedure2.png", "order.png", "problem1.png", "problem2.png",
"procedure1.png", "procedure2.png", "refill-med.png", "refill-med2.png",
"remove-problem.png", "remove-problem2.png", "renew-alert.png",
"renew-med.png", "report.png", "schedule-consult.png", "schedule-followup.png",
"schedule-imaging.png", "schedule-lab.png", "schedule-lab2.png",
"search-note.png", "sign-alert.png", "sign-imaging.png",
"sign-lab.png", "sign-lab2.png", "sign-note.png", "sign-order.png",
"sign-report.png", "sort-alert.png", "sort-vitals.png", "view-adverse.png",
"view-history.png", "view-imaging.png", "view-lab.png", "view-lab2.png",
"view-med.png", "view-note.png", "view-order.png", "view-problem.png",
"view-problem2.png", "view-report.png", "view-vitals.png",
"vitals.png"), class = "factor"), icon_action = c("", "",
"", "order", "sign", "enter", "flag", "order", "", "", "remove",
"schedule", "edit", "edit", "order", "graph", "search", "sign",
"sign", "schedule", "schedule", "", "view", "flag", "", "enter",
"enter", "order", "", "renew", "enter", "view", "schedule",
"edit", "renew", "order", "remove", "create", "edit", "enter"
), icon_subject = c("report", "medadmin", "consult", "consult",
"alert", "adverse", "lab2", "lab2", "medadmin", "meds2",
"problem2", "imaging", "note2", "problem4", "lab", "lab",
"note", "alert", "imaging", "lab", "lab", "note", "imaging",
"lab2", "medadmin", "problem", "vitals", "med", "history",
"alert", "vitals", "vitals", "lab2", "note", "med", "procedure",
"problem", "graph", "problem3", "problem2"), instance = structure(c(2L,
8L, 7L, 26L, 80L, 49L, 78L, 24L, 11L, 7L, 83L, 24L, 77L,
43L, 26L, 67L, 73L, 38L, 31L, 74L, 27L, 12L, 26L, 87L, 15L,
31L, 53L, 42L, 2L, 53L, 88L, 57L, 47L, 62L, 54L, 37L, 40L,
78L, 32L, 33L), .Label = c("1", "2", "3", "4", "5", "6",
"7", "8", "9", "10", "11", "12", "13", "14", "15", "16",
"17", "18", "19", "20", "21", "22", "23", "24", "25", "26",
"27", "28", "29", "30", "31", "32", "33", "34", "35", "36",
"37", "38", "39", "40", "41", "42", "43", "44", "45", "46",
"47", "48", "49", "50", "51", "52", "53", "54", "55", "56",
"57", "58", "59", "60", "61", "62", "63", "64", "65", "66",
"67", "68", "69", "70", "71", "72", "73", "74", "75", "76",
"77", "78", "79", "80", "81", "82", "83", "84", "85", "86",
"87", "88"), class = "factor"), action_instance = c(NA, NA,
NA, 2L, 7L, 3L, 4L, 1L, NA, NA, 2L, 1L, 5L, 2L, 2L, 2L, 1L,
6L, 2L, 4L, 1L, NA, 2L, 4L, NA, 2L, 3L, 4L, NA, 1L, 6L, 9L,
1L, 5L, 2L, 2L, 1L, 3L, 2L, 1L), subject_instance = c(1L,
1L, 1L, 2L, 4L, 2L, 7L, 2L, 1L, 1L, 3L, 2L, 1L, 1L, 1L, 6L,
5L, 3L, 2L, 3L, 2L, 1L, 2L, 7L, 1L, 2L, 4L, 2L, 1L, 5L, 5L,
3L, 4L, 4L, 3L, 1L, 3L, 3L, 1L, 2L)), .Names = c("ResponseID",
"ID", "icon", "measure", "value", "file", "icon_action", "icon_subject",
"instance", "action_instance", "subject_instance"), class = c("tbl_df",
"data.frame"), row.names = c(NA, -40L))
filter(mydata, measure=="pagesubmit") %>% ggplot(aes(y=as.numeric(value), x=as.factor(subject_instance), color=as.factor(subject_instance))) + geom_boxplot()
Also, on an semi-related note, why doesn't this work?:
filter(icon, measure=="pagesubmit") %>% mean(value)
As mentioned by @Spacedman, you example is not reproducible/consistent.
Part 1
This works:
Part 2
If you want the mean of all
measure
equal topagesubmit
, you can do:Your initial attempt does not work because you are trying to take the
mean()
out of the whole data frame. In fact, when you do... %>% mean(value)
you get the following error message:Where
.
is the left-hand argument (mydata
filtered to retain onlymeasure
equal topagesubmit
) that%>%
pipes forward tomean()
. Following the same logic you should instead do: