I am building a Shiny app so that non-R-users can analyze data from a two-phase survey. Currently, only R and Stata can analyze data from this type of sample design (I think). The data analysts here will not be comfortable working in any programming language, thus this app.
I am running into an error when I am asking for svymean. Below is my code, along with the dataset (using the nwtco dataset from the survey package, with a little bit of nonsense tweaking for the purposes of having variables that I will have in our survey data, namely sampling weights).
library(shiny)
library(ggplot2)
library(survey)
data(nwtco)
# process nwtco a bit to give it fake sampling weights, etc
# Use the nwtco dataset from the survival package for tutorial - Data from the National Wilm's Tumor Study
# 4028 observations
# Variables:
# seqno - id number
# instit - Histology from local institution
# histol - Histology from central lab
# stage - Disease stage
# study - Study number
# rel - Relapse indicator
# edrel - Time to relapse
# age - Age in months
# in.subcohort - Included in the subcohort for the example in the paper
# Phase 1 sampling weight
nwtco$p1wts <- 1/((4028/162930890))
nwtco$p1sampprob <- (4028/162930890)
# Phase 2
dst <- data.frame(prop.table(ftable(xtabs(~instit+stage, nwtco))))
nwtco<- merge(nwtco, dst, by=c("instit", "stage"), all=T)
nwtco$p2wts <- 1/nwtco$Freq
nwtco$p2sampprob <- nwtco$Freq
write.csv(nwtco, "nwtco.csv")
# This is the design for this two-phase data
dccs8_approx<-twophase(id=list(~seqno,~seqno),strata=list(NULL,~interaction(stage,instit)),
data=nwtco, weights=list(~p1wts,~p2wts), subset=~in.subcohort, method="approx")
# These are the kinds of estimates I want to get from this data using Shiny
svymean(~age, dccs8_approx)
confint(svymean(~age, dccs8_approx))
Here's the ui:
ui <- shinyUI(fluidPage(
navbarPage("Two-phase Survey Data Analysis Application",
# First panel - upload data and give summary
tabPanel("Upload Two-phase Survey Data",
sidebarLayout(
sidebarPanel(
#Selector for file upload
fileInput('datafile', 'Choose Two-phase Survey Data file',
accept='.csv', width='100%')
),
mainPanel(
verbatimTextOutput("desc"),
br(),
verbatimTextOutput("sum")
)
)
),
# Second panel - statistical analysis
tabPanel("Estimation",
sidebarLayout(
sidebarPanel(
h3("Please specify two-phase sample design options"),
uiOutput("title"),
uiOutput("idp1"),
uiOutput("idp2"),
uiOutput("strata1"),
uiOutput("strata2"),
uiOutput("p1wts"),
uiOutput("p2wts"),
uiOutput("inp2"),
uiOutput("esttype"),
uiOutput("title2"),
h3("Select variable for estimation"),
uiOutput("var"),
actionButton("analysis", "Analyze!")
),
mainPanel(
textOutput("regTab")
)
)
))))
Here's the server:
options(shiny.browser=TRUE)
server <- shinyServer(function(input, output) {
# First panel - load data and see summary
# This function is repsonsible for loading in the selected file
filedata <- reactive({
infile <- input$datafile
if (is.null(infile)) {
# User has not uploaded a file yet
return(NULL)
}
read.csv(infile$datapath, stringsAsFactors = T, na.strings=c(".", " ", "", "NA"))
})
# This previews the CSV data file
output$desc <- renderPrint({
str(filedata())
})
output$sum <- renderPrint({
dat <- filedata()
summary(dat)
})
# Secpnd panel - two phase analysis
# Choose estimation type
output$esttype <- renderUI({
esttype <- c("Proportion", "Mean")
selectInput("esttype", "Estimate Type", esttype )
})
# Design specifications
output$idp1 <- renderUI({
dat <- filedata()
selectInput("idp1", "Phase 1 ID", names(dat))
})
output$idp2 <- renderUI({
dat <- filedata()
selectInput("idp2", "Phase 2 ID", names(dat))
})
output$strata1 <- renderUI({
dat <- filedata()
selectInput("strata1", "First Strata Variable", names(dat)[!names(dat) %in% c(input$idp1, input$idp2)])
})
output$strata2 <- renderUI({
dat <- filedata()
selectInput("strata2", "Second Strata Variable", names(dat)[!names(dat) %in% c(input$idp1, input$idp2, input$strata1)])
})
output$p1wts <- renderUI({
dat <- filedata()
selectInput("p1wts", "Phase 1 Sampling Weights", names(dat)[!names(dat) %in% c(input$idp1, input$idp2, input$strata1, input$strata2)])
})
output$p2wts <- renderUI({
dat <- filedata()
selectInput("p2wts", "Phase 2 Sampling Weights", names(dat)[!names(dat) %in% c(input$idp1, input$idp2, input$strata1, input$strata2, input$p1wts)])
})
output$inp2 <- renderUI({
dat <- filedata()
selectInput("inp2", "Indicator for Phase 2 Selection", names(dat)[!names(dat) %in% c(input$idp1, input$idp2, input$strata1, input$strata2, input$p1wts,input$p2wts)])
})
# Select variable to estimate
output$var <- renderUI({
dat <- filedata()
selectInput("var", "Variable to Estimate", names(dat)[!names(dat) %in% c(input$idp1, input$idp2, input$strata1, input$strata2, input$p1wts,input$p2wts, input$inp2)])
})
observeEvent(input$analysis, {
dat <- filedata()
twophase <- twophase(id=list(as.formula(paste0("~",input$idp1)), as.formula(paste0("~",input$idp2))), strata=list(NULL, ~interaction(input$strata1, input$strata2)), data=dat, weights = list(as.formula(paste0("~",input$p1wts)), subset=as.formula(paste0("~",input$inp2)), method="simple")
output$regTab <- renderPrint({
if(input$esttype=="Proportion") {
ftable(svymean(as.formula(paste0("~", "as.factor(", input$var, ")")), design=twophase)*100)
ftable(confint(svymean(as.formula(paste0("~", "as.factor(", input$var, ")")), design=twophase))*100)
} else {
ftable(svymean(as.formula(paste0("~",input$var)), design=twophase))
ftable(confint(svymean(as.formula(paste0("~",input$var)), design=twophase)))
}
})
})
})
Here's the call:
shinyApp(ui = ui, server = server)
And here is the error I get:
Warning in twophase(id = list(as.formula(paste0("~", input$idp2)), as.formula(paste0("~", :
Second-stage fpc not specified and not computable
Warning: Error in table: all arguments must have the same length
Stack trace (innermost first):
73: table
72: rowSums
71: svydesign.default
70: svydesign
69: twophase
68: observeEventHandler [#82]
4: <Anonymous>
3: do.call
2: print.shiny.appobj
1: <Promise>
What in the world am I doing wrong?
Thanks for your input!
Jen
It's quite involved to get arguments evaluated inside calls inside a model formula
So, you need this as your
twophase()
call in the Shiny app: