Using the survey package in a Shiny app gives "Warning: Error in table: all arguments must have the same length"

404 views Asked by At

I am building a Shiny app so that non-R-users can analyze data from a two-phase survey. Currently, only R and Stata can analyze data from this type of sample design (I think). The data analysts here will not be comfortable working in any programming language, thus this app.

I am running into an error when I am asking for svymean. Below is my code, along with the dataset (using the nwtco dataset from the survey package, with a little bit of nonsense tweaking for the purposes of having variables that I will have in our survey data, namely sampling weights).

library(shiny)
library(ggplot2)
library(survey)
data(nwtco)

# process nwtco a bit to give it fake sampling weights, etc

# Use the nwtco dataset from the survival package for tutorial - Data from the National Wilm's Tumor Study
#  4028 observations
#  Variables:
           #  seqno - id number
           #  instit - Histology from local institution
           #  histol - Histology from central lab
           #  stage - Disease stage
           #  study - Study number
           #  rel - Relapse indicator
           #  edrel - Time to relapse
           #  age - Age in months
           #  in.subcohort - Included in the subcohort for the example in the paper


# Phase 1 sampling weight
nwtco$p1wts <- 1/((4028/162930890))
nwtco$p1sampprob <- (4028/162930890)

# Phase 2
dst <- data.frame(prop.table(ftable(xtabs(~instit+stage, nwtco))))
nwtco<- merge(nwtco, dst, by=c("instit", "stage"), all=T)
nwtco$p2wts <- 1/nwtco$Freq
nwtco$p2sampprob <- nwtco$Freq

write.csv(nwtco, "nwtco.csv")

# This is the design for this two-phase data
dccs8_approx<-twophase(id=list(~seqno,~seqno),strata=list(NULL,~interaction(stage,instit)),
                       data=nwtco, weights=list(~p1wts,~p2wts), subset=~in.subcohort, method="approx")

# These are the kinds of estimates I want to get from this data using Shiny
svymean(~age, dccs8_approx)
confint(svymean(~age, dccs8_approx))

Here's the ui:

ui <- shinyUI(fluidPage(

navbarPage("Two-phase Survey Data Analysis Application",


# First panel - upload data and give summary

tabPanel("Upload Two-phase Survey Data",
       sidebarLayout(
            sidebarPanel(
                #Selector for file upload
                fileInput('datafile', 'Choose Two-phase Survey Data file',
                accept='.csv', width='100%')
        ),

        mainPanel(                  
            verbatimTextOutput("desc"),
            br(),
            verbatimTextOutput("sum")
        )
    ) 
),

# Second panel - statistical analysis

tabPanel("Estimation",
    sidebarLayout(
         sidebarPanel(
            h3("Please specify two-phase sample design options"),
            uiOutput("title"),
            uiOutput("idp1"),
            uiOutput("idp2"),
            uiOutput("strata1"),
            uiOutput("strata2"),
            uiOutput("p1wts"),
            uiOutput("p2wts"),
            uiOutput("inp2"),
            uiOutput("esttype"),
            uiOutput("title2"),
            h3("Select variable for estimation"),
            uiOutput("var"),
            actionButton("analysis", "Analyze!")
         ),       
        mainPanel(
            textOutput("regTab")
        ) 
)

))))

Here's the server:

options(shiny.browser=TRUE)

server <- shinyServer(function(input, output) {


# First panel - load data and see summary
# This function is repsonsible for loading in the selected file
filedata <- reactive({
        infile <- input$datafile
        if (is.null(infile)) {
    # User has not uploaded a file yet
    return(NULL)
        }
        read.csv(infile$datapath, stringsAsFactors = T, na.strings=c(".", " ", "", "NA"))
})  

# This previews the CSV data file
output$desc <- renderPrint({
    str(filedata())
})
    output$sum <- renderPrint({
        dat <- filedata()
        summary(dat)    
    })


# Secpnd panel - two phase analysis

        # Choose estimation type    
        output$esttype <- renderUI({
            esttype <- c("Proportion", "Mean")
            selectInput("esttype", "Estimate Type",  esttype )

        })

        # Design specifications


        output$idp1 <- renderUI({
            dat <- filedata()
                selectInput("idp1", "Phase 1 ID", names(dat))
        })

        output$idp2 <- renderUI({
            dat <- filedata()
                selectInput("idp2", "Phase 2 ID", names(dat))
        })

        output$strata1 <- renderUI({
            dat <- filedata()
                selectInput("strata1", "First Strata Variable", names(dat)[!names(dat) %in% c(input$idp1, input$idp2)])
        })

        output$strata2 <- renderUI({
            dat <- filedata()
                selectInput("strata2", "Second Strata Variable", names(dat)[!names(dat) %in% c(input$idp1, input$idp2, input$strata1)])
        })

        output$p1wts <- renderUI({
            dat <- filedata()
                selectInput("p1wts", "Phase 1 Sampling Weights", names(dat)[!names(dat) %in% c(input$idp1, input$idp2, input$strata1, input$strata2)])
        })

        output$p2wts <- renderUI({
            dat <- filedata()
                selectInput("p2wts", "Phase 2 Sampling Weights", names(dat)[!names(dat) %in% c(input$idp1, input$idp2, input$strata1, input$strata2, input$p1wts)])
        })

        output$inp2 <- renderUI({
            dat <- filedata()
                selectInput("inp2", "Indicator for Phase 2 Selection", names(dat)[!names(dat) %in% c(input$idp1, input$idp2, input$strata1, input$strata2, input$p1wts,input$p2wts)])
        })

        # Select variable to estimate

        output$var <- renderUI({
            dat <- filedata()
            selectInput("var", "Variable to Estimate", names(dat)[!names(dat) %in% c(input$idp1, input$idp2, input$strata1, input$strata2, input$p1wts,input$p2wts, input$inp2)])
        })

    observeEvent(input$analysis, {

        dat <- filedata()
            twophase <- twophase(id=list(as.formula(paste0("~",input$idp1)), as.formula(paste0("~",input$idp2))), strata=list(NULL, ~interaction(input$strata1, input$strata2)), data=dat, weights = list(as.formula(paste0("~",input$p1wts)),  subset=as.formula(paste0("~",input$inp2)), method="simple")

        output$regTab <- renderPrint({

            if(input$esttype=="Proportion") {
            ftable(svymean(as.formula(paste0("~", "as.factor(", input$var, ")")), design=twophase)*100)
            ftable(confint(svymean(as.formula(paste0("~", "as.factor(", input$var, ")")), design=twophase))*100)
        } else {
            ftable(svymean(as.formula(paste0("~",input$var)), design=twophase))
            ftable(confint(svymean(as.formula(paste0("~",input$var)), design=twophase)))
        }                                       
        })
    })          
})  

Here's the call:

shinyApp(ui = ui, server = server)

And here is the error I get:

 Warning in twophase(id = list(as.formula(paste0("~", input$idp2)), as.formula(paste0("~",  :
 Second-stage fpc not specified and not computable
 Warning: Error in table: all arguments must have the same length
 Stack trace (innermost first):
    73: table
    72: rowSums
    71: svydesign.default
    70: svydesign
    69: twophase
    68: observeEventHandler [#82]
     4: <Anonymous>
     3: do.call
     2: print.shiny.appobj
     1: <Promise>

What in the world am I doing wrong?

Thanks for your input!

Jen

1

There are 1 answers

0
Thomas Lumley On

It's quite involved to get arguments evaluated inside calls inside a model formula

> ~interaction(input$strata1,input$strata2)
 ~interaction(input$strata1, input$strata2)
> bquote(~interaction(.(input$strata1),.(input$strata2)))
 ~interaction("instit", "rel")
> bquote(~interaction(.(as.name(input$strata1)),.(as.name(input$strata2))))
 ~interaction(instit, rel)

So, you need this as your twophase() call in the Shiny app:

twophase <- twophase(id=list(as.formula(paste0("~",input$idp1)), as.formula(paste0("~",input$idp2))), 
                     strata = list(NULL, eval(bquote(~interaction(.(as.name(input$strata1)),.(as.name(input$strata2)))))),
                     data=dat, weights = list(as.formula(paste0("~",input$p1wts)),as.formula(paste0("~", input$p2wts))), 
                     fpc=list(NULL, NULL), subset=as.formula(paste0("~",input$inp2)), method="simple")