Recreating Electoral Targeting Package

84 views Asked by At

I'm really into politics and elections and I'm just getting into learning R and I would like to recreate the process laid out in this blog with new data from a local county. I've been able to get through most of the process in the blog reliably with my modified code until I get to district analyze.

datas <- district.analyze(data)

The author analyzes specific house districts whereas I would prefer to analyze the county as a whole. I modified the code to use U.S. House as my target district because it encompasses the entire county.

I want to know if anyone has suggestions on why I am unable to get the precinct level summaries from this county data. I get an error like this one:

> Error in aggregate.data.frame(as.data.frame(x), ...) : 
no rows to aggregate
In addition: Warning message:
In min(adf[, "rep_turnout_pct"], na.rm = TRUE) :

I only get this error when I have "NA" in the data. When I have "0" in place of the blanks the district.analyze works, however the "0" throws off all of the equations.

the minimum amount of code I can get to reproduce this is:

library(plyr)

 major.party.bias <- function(adf) {    

 # aggregate base partisan vote -  lowest non-zero turnout by party, given any election

 abpv_rep <- min(adf[adf$rep_turnout_pct,"rep_turnout_pct"],na.rm=TRUE)
abpv_dem <- min(adf[adf$dem_turnout_pct,"dem_turnout_pct"],na.rm=TRUE)

 # aggregate base partisan is combination of major parties worst scores
 base_abpv = abpv_rep + abpv_dem
 # swing is what is left after the aggregate base partisan support is removed
 abpv_swing = 1.0 - base_abpv

 # remove elections w/ no contender ie NA rep or NA dem turnout
 tsa <- adf[which(!is.na(adf$dem_turnout) & !is.na(adf$rep_turnout)),]  
 # add a abs difference of rep v dem column
 tsa[,"spread"] <- abs(tsa$dem_turnout_pct - tsa$rep_turnout_pct)

 # average party performance - average of the top 3 best matched races (sorted by abs(rep-dem) performance)
 app_dem <- mean(tsa[order(tsa$spread)[1:3],]$dem_turnout_pct)
 app_rep <- mean(tsa[order(tsa$spread)[1:3],]$rep_turnout_pct)

 # aggreage soft partisan vote - difference between the average worst over each year and the absolute worst (aggregate base partisan vote)
 tsa <- adf[which(!is.na(adf$rep_turnout)),]
 abpv_rep_soft <- mean(aggregate(tsa$rep_turnout_pct,tsa["year"],min)[,"x"]) - abpv_rep
 tsa <- adf[which(!is.na(adf$dem_turnout)),]
 abpv_dem_soft <- mean(aggregate(tsa$dem_turnout_pct,tsa["year"],min)[,"x"]) - abpv_dem

 # tossup is everything left after we take out base and soft support for both major parties
 abpv_tossup = abs(1.0 - abpv_rep_soft - abpv_rep - abpv_dem_soft - abpv_dem)

 partisan.rep <- abpv_rep + abpv_rep_soft
 partisan.dem <- abpv_dem + abpv_dem_soft

 return (data.frame(partisan.base=base_abpv,partisan.swing=abpv_swing,tossup=abpv_tossup,
                    app.rep=app_rep,base.rep=abpv_rep,soft.rep=abpv_rep_soft,app.dem=app_dem,base.dem=abpv_dem,soft.dem=abpv_dem_soft,
                    partisan.rep=partisan.rep, partisan.dem=partisan.dem)) 
}


 project.turnout <- function(adf,years=c(2012,2014,2016),target.district.type="U.S. House",similar.district.types=c('U.S. Senate','State Senate', 'State Auditor', 'Governor'),top.ballot.district.type="U.S. Senate") {
 # look for good elections in years
 case.type = 0
 gl <- adf[which(adf$year %in% years & adf$district_type == target.district.type & !is.na(adf$dem_turnout) & !is.na(adf$rep_turnout)),] 
 # case 1 - major parties ran in 2001,2005 (governor + lt governor + HD)
 # we'll calculate the average_turnout x downballot_turnout
 proj.turnout <- 0.0
 if(nrow(gl) >= 2 ){
     down.ballot.turnout <- mean((gl$dem_turnout + gl$rep_turnout) / gl$total_registration)     
     gl <- adf[which(adf$year %in% years & adf$district_type == top.ballot.district.type),]             
     top.ticket.turnout <- mean(gl$total_turnout / gl$total_registration)
     gl <- adf[which(adf$year %in% years & !is.na(adf$dem_turnout) & !is.na(adf$rep_turnout)),]             
     avg.turnout <- mean((gl$dem_turnout + gl$rep_turnout) / gl$total_registration) 
     runoff <- down.ballot.turnout / top.ticket.turnout
     proj.turnout <- runoff * avg.turnout
     case.type = 1
 }  
 # case 2 - missing major party candidate in ''years'', so we 'll just take the average of what we've got walking backwards from the last known good year
 # need more than one HD election
 else {     
     gl <- adf[which(adf$district_type == target.district.type & !is.na(adf$dem_turnout) & !is.na(adf$rep_turnout)),]   
     if(nrow(gl) >= 1 ) {
         # calculate the average turnout of at least one election
        proj.turnout <- mean((gl$dem_turnout + gl$rep_turnout) / gl$total_registration)                     
         case.type = 2
     }
     else {
         # we dont have any evenly matched house races so we'll look at ''similar.district.types'' as a substitute
         gl <- adf[which((adf$district_type %in% similar.district.types) & !is.na(adf$dem_turnout) & !is.na(adf$rep_turnout)),] 
         if(nrow(gl) >= 1) {
             proj.turnout <- mean((gl$dem_turnout + gl$rep_turnout) / gl$total_registration)                                        
             case.type = 3
         }
         else {
             proj.turnout <- 0          
             case.type = 4
         }
     }
 }
 # project the actual registration based on the known last registration in the df
 reg <- adf[1,]$last_registration
 proj.turnout.count <- proj.turnout  * reg return(data.frame(proj.turnout.percent=proj.turnout,proj.turnout.count=proj.turnout.count,current.reg=reg,case.type=case.type))
}

  # apply the major party bias to the projected turnout 
apply.turnout <- function(adf) {
   # take proj.turnout.count (from project.turnout) and combine it 
 with partisan percentages from major.party.bias


  adf$proj.turnout.dem <- floor(adf$proj.turnout.count * adf$app.dem)
  adf$proj.turnout.rep <- floor(adf$proj.turnout.count * adf$app.rep)
  adf$votes.to.win <- floor(adf$proj.turnout.count/2)+1

 return(adf)
 }


 district.analyze <- function(dis) {
ret <- ddply(dis, .(precinct_name), function(x) merge(project.turnout(x),major.party.bias(x)))
ret <- apply.turnout(ret)
return(ret)
 }

My data is a large dataset I read into R from .csv:

## Data given as Google Sheets
library(gsheet)
url <-"https://drive.google.com/file/d/1E4P0rfDVWEepbGHwX58qNSWN5vWd3iQU/view?usp=sharing"
df <- gsheet2tbl(url)
1

There are 1 answers

3
Kim On

Because URLs tend to rot, it's better if, as @Steady pointed out, make a minimal reproducible example. I'll just work with what you've given us though.

I first import your code from GitHub:

## Matt's code from GitHub
library(RCurl)
script <-
  getURL(
    "https://raw.githubusercontent.com/mwtxw2/R-Aggpol-Boone-Test/master/R%20Data",
    ssl.verifypeer = FALSE
  )
eval(parse(text = script))

Then I read in the data you gave in the Google Sheets

## Data given as Google Sheets
library(gsheet)
url <-
  "https://docs.google.com/spreadsheets/d/1HJjLDFEiLixQZLeMtliXyojsPtWh6XM9AZjJbc-96tA/edit?usp=sharing"
df <- gsheet2tbl(url)

Now, the problem was, I believe, that there are no district numbers if U.S. Senate subset is called---they are NA values.

## There are no district numbers if subsetted to U.S. Senate.
table((df %>% dplyr::filter(district_type == "U.S. Senate"))$district_number)
sum(!is.na((df %>% dplyr::filter(district_type == "U.S. Senate"))$district_number))

## Summary function edited
historical.turnout.summary <- function(adf,
                                       district.type = "U.S. Senate",
                                       district.number = NULL,
                                       years = c(2012, 2014, 2016)) {
    s <-
      adf[which(
        adf$district_type == district.type &
          # adf$district_number == district.number &
          adf$year %in% years
      ), ]
    if (!is.null(district.number)) {
      s <- 
        s[which(s$district_number == district.number)]
    }
    df <- ddply(
      s, "year",
      function(x) {
        year <- x$year[1]
        total.turnout <- sum(x$total_turnout)
        total.registration <- sum(x$total_registration)
        return(
          data.frame(
            year = year,
            total.turnout = total.turnout,
            total.registration = total.registration
          )
        )
      }
    )
    return(df)
  }

## Re-run
historical.turnout.summary(
  df, 
  district.type = "U.S. Senate", 
  years = c(2012, 2014, 2016) 
)

But I'm not 100% sure because you might have read the data off your local file, and while the data uploaded/downloaded from Google Sheets, blanks may have turned to NAs.

With the district.analyze, there is an error when you call major.party.bias because you try to do numerical operations on expressions such as "21.39%" and "62.81%". They will have to be parsed and turned to numeric.

Let me know if this wasn't what you were going for.