options(repos = c(CRAN = "https://cloud.r-project.org"))
Mini Project #03: Do Proportional Electoral College Allocations Yield a More Representative Presidency?
Introduction
Welcome to Mini Project 03. This project focuses on analyzing voting patterns and political trends in U.S. elections from 1976 to 2022. By examining data across multiple election cycles, this analysis aims to uncover shifts in voter behavior, partisan changes, and regional dynamics over time. We will look into both presidential and congressional election results, highlighting key trends and outcomes that have shaped the political landscape.
Data I: US House Election Votes from 1976 to 2022
Our first step is to download the U.S. House 1976–2022 CSV file from the web browser. This data set collects votes from all biennial congressional races in all 50 states.
Initial codes to set up the report
Setting up the codes to avoid warning errors
Code to download the the U.S. House 1976–2022 CSV file
The following code shows how to download the U.S. House 1979- 2022 database.
<- function(){
get_ushouse_file # The complete URL for direct file download using the fileId parameter
<- "https://dataverse.harvard.edu/api/access/datafile/8963860"
FILE_URL
# Name of the local file to save the data
<- "1976-2022-house.tab"
local_file
# Download the file if it does not already exist locally
if(!file.exists(local_file)){
download.file(FILE_URL, destfile = local_file, mode = "wb")
}
# Read the downloaded file into a data frame, using read_tsv for tab-separated files
as.data.frame(readr::read_tsv(local_file, lazy=FALSE))
}# Call the function and read the data into a data frame
<- get_ushouse_file()
ushouse_data # Display the first few rows of the data to verify
head(ushouse_data)
year state state_po state_fips state_cen state_ic office district stage
1 1976 ALABAMA AL 1 63 41 US HOUSE 1 GEN
2 1976 ALABAMA AL 1 63 41 US HOUSE 1 GEN
3 1976 ALABAMA AL 1 63 41 US HOUSE 1 GEN
4 1976 ALABAMA AL 1 63 41 US HOUSE 2 GEN
5 1976 ALABAMA AL 1 63 41 US HOUSE 2 GEN
6 1976 ALABAMA AL 1 63 41 US HOUSE 2 GEN
runoff special candidate party writein mode
1 FALSE FALSE BILL DAVENPORT DEMOCRAT FALSE TOTAL
2 FALSE FALSE JACK EDWARDS REPUBLICAN FALSE TOTAL
3 FALSE FALSE WRITEIN <NA> TRUE TOTAL
4 FALSE FALSE J CAROLE KEAHEY DEMOCRAT FALSE TOTAL
5 FALSE FALSE WILLIAM L \\BILL\\ DICKINSON REPUBLICAN FALSE TOTAL
6 FALSE FALSE WRITEIN <NA> TRUE TOTAL
candidatevotes totalvotes unofficial version fusion_ticket
1 58906 157170 FALSE 20230706 FALSE
2 98257 157170 FALSE 20230706 FALSE
3 7 157170 FALSE 20230706 FALSE
4 66288 156362 FALSE 20230706 FALSE
5 90069 156362 FALSE 20230706 FALSE
6 5 156362 FALSE 20230706 FALSE
Secondly, we’ll also download, the dataset Statewide presidential vote counts from 1976 to 2022.
Note that this time, we’ll read the CSV file directly from our repository, not from the website.
# Install and load the readr package if it's not already installed
if (!requireNamespace("readr", quietly = TRUE)) {
install.packages("readr")
}library(readr)
<- "C:\\Users\\krisf\\OneDrive\\Documentos\\STA9750-2024-FALL\\1976-2020-president.csv"
file_path
# Read the file, assuming it's a .csv file
<- read_csv(file_path)
uspresident_data
# Display the first few rows of the dataset
head(uspresident_data)
# A tibble: 6 × 15
year state state_po state_fips state_cen state_ic office candidate
<dbl> <chr> <chr> <dbl> <dbl> <dbl> <chr> <chr>
1 1976 ALABAMA AL 1 63 41 US PRESIDENT "CARTER, JI…
2 1976 ALABAMA AL 1 63 41 US PRESIDENT "FORD, GERA…
3 1976 ALABAMA AL 1 63 41 US PRESIDENT "MADDOX, LE…
4 1976 ALABAMA AL 1 63 41 US PRESIDENT "BUBAR, BEN…
5 1976 ALABAMA AL 1 63 41 US PRESIDENT "HALL, GUS"
6 1976 ALABAMA AL 1 63 41 US PRESIDENT "MACBRIDE, …
# ℹ 7 more variables: party_detailed <chr>, writein <lgl>,
# candidatevotes <dbl>, totalvotes <dbl>, version <dbl>, notes <lgl>,
# party_simplified <chr>
Data II: Congressional Boundary Files 1976 to 2012
Loading the required libraries:
Loading the required libraries for the document
library(dplyr)
library(tidyverse)
library(ggplot2)
library(stringr)
library(httr)
require(ggplot2)
require(sf)
Task 1: Code to download the Congressional Shapefiles 1976 - 2012
The following code shows how to download the Congressional Shapefiles.
library(httr)
library(sf)
# Base URL for downloading congressional shapefiles
<- "https://cdmaps.polisci.ucla.edu/shp/districts"
base_url
# Define the range of Congress numbers (from 94 to 113)
<- 94:113
congresses
# Define the directory where the shapefiles will be saved
<- "congress_shapefiles"
download_dir if (!dir.exists(download_dir)) dir.create(download_dir)
# Function to download the shapefile if not already downloaded
<- function(congress, base_url, download_dir) {
download_shapefile # Construct the URL for the specific shapefile
<- paste0(base_url, congress, ".zip")
file_url
# Define the local filename for the zip file
<- file.path(download_dir, paste0("districts", congress, ".zip"))
zip_file
# Check if the file already exists
if (!file.exists(zip_file)) {
message(paste("Downloading Congress", congress, "shapefile..."))
GET(file_url, write_disk(zip_file, overwrite = TRUE))
else {
} message(paste("File for Congress", congress, "already exists. Skipping download."))
}return(zip_file)
}
# Function to unzip the downloaded shapefiles and load the .shp file
<- function(zip_file) {
unzip_and_load_shapefile <- gsub("\\.zip$", "", zip_file) # Remove .zip extension for folder name
unzip_dir # Unzip if the directory does not already exist
if (!dir.exists(unzip_dir)) unzip(zip_file, exdir = unzip_dir)
# Define the path to the .shp file (inside "districtShapes" folder)
<- file.path(unzip_dir, "districtShapes", paste0(basename(unzip_dir), ".shp"))
shapefile_path
# Load the shapefile if it exists
if (file.exists(shapefile_path)) {
return(st_read(shapefile_path))
else {
} warning("Shapefile not found for Congress ", basename(unzip_dir))
return(NULL)
}
}
# Loop through all specified Congress numbers, download, and load each shapefile
for (congress in congresses) {
<- download_shapefile(congress, base_url, download_dir)
zip_file <- unzip_and_load_shapefile(zip_file)
shapefile_data
#Display the first few rows if successfully loaded
if (!is.null(shapefile_data)) {
print(paste("Data for Congress", congress))
print(head(shapefile_data))
} }
Reading layer `districts99' from data source
`C:\Users\krisf\OneDrive\Documentos\STA9750-2024-FALL\congress_shapefiles\districts99\districtShapes\districts99.shp'
using driver `ESRI Shapefile'
Simple feature collection with 436 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -178.3395 ymin: 18.91383 xmax: 177.7388 ymax: 71.3857
Geodetic CRS: GRS 1980(IUGG, 1980)
[1] "Data for Congress 99"
Simple feature collection with 6 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -117.2819 ymin: 31.6099 xmax: -73.83379 ymax: 42.4962
Geodetic CRS: GRS 1980(IUGG, 1980)
STATENAME ID DISTRICT STARTCONG ENDCONG DISTRICTSI COUNTY PAGE
1 California 006099102041 41 99 102 <NA> <NA> <NA>
2 New York 036099102010 10 99 102 <NA> <NA> <NA>
3 New York 036099102013 13 99 102 <NA> <NA> <NA>
4 Illinois 017098102012 12 98 102 <NA> <NA> <NA>
5 Mississippi 028099102001 1 99 102 <NA> <NA> <NA>
6 Mississippi 028099102002 2 99 102 <NA> <NA> <NA>
LAW
1 <NA>
2 <NA>
3 <NA>
4 <NA>
5 <NA>
6 <NA>
NOTE
1 {"Altered when inserting data from California_98cc/98cc_41cd_California.shp."}
2 {"Altered when inserting data from NewYork_98cc/98cc_10cd_NewYork.shp."}
3 {"Altered when inserting data from NewYork_98cc/98cc_13cd_NewYork.shp."}
4 <NA>
5 {"Altered when inserting data from Mississippi_98cc/98cc_1cd_Mississippi.shp."}
6 {"Altered when inserting data from Mississippi_98cc/98cc_2cd_Mississippi.shp."}
BESTDEC FINALNOTE RNOTE LASTCHANGE FROMCOUNTY
1 <NA> <NA> <NA> 2016-05-20 13:07:37.070991 F
2 <NA> <NA> <NA> 2016-05-20 13:09:36.525098 F
3 <NA> <NA> <NA> 2016-05-20 13:09:36.572309 F
4 <NA> {"From 1994 TigerLines"} <NA> 2016-05-20 13:04:54.104494 F
5 <NA> <NA> <NA> 2016-05-20 13:09:01.573128 F
6 <NA> <NA> <NA> 2016-05-20 13:09:01.604169 F
geometry
1 MULTIPOLYGON (((-117.2279 3...
2 MULTIPOLYGON (((-73.83544 4...
3 MULTIPOLYGON (((-73.9948 40...
4 MULTIPOLYGON (((-88.2635 42...
5 MULTIPOLYGON (((-88.20581 3...
6 MULTIPOLYGON (((-90.73618 3...
Reading layer `districts100' from data source
`C:\Users\krisf\OneDrive\Documentos\STA9750-2024-FALL\congress_shapefiles\districts100\districtShapes\districts100.shp'
using driver `ESRI Shapefile'
Simple feature collection with 436 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -178.3395 ymin: 18.91383 xmax: 177.7388 ymax: 71.3857
Geodetic CRS: GRS 1980(IUGG, 1980)
[1] "Data for Congress 100"
Simple feature collection with 6 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -117.2819 ymin: 31.6099 xmax: -73.83379 ymax: 42.4962
Geodetic CRS: GRS 1980(IUGG, 1980)
STATENAME ID DISTRICT STARTCONG ENDCONG DISTRICTSI COUNTY PAGE
1 California 006099102041 41 99 102 <NA> <NA> <NA>
2 New York 036099102010 10 99 102 <NA> <NA> <NA>
3 New York 036099102013 13 99 102 <NA> <NA> <NA>
4 Illinois 017098102012 12 98 102 <NA> <NA> <NA>
5 Mississippi 028099102001 1 99 102 <NA> <NA> <NA>
6 Mississippi 028099102002 2 99 102 <NA> <NA> <NA>
LAW
1 <NA>
2 <NA>
3 <NA>
4 <NA>
5 <NA>
6 <NA>
NOTE
1 {"Altered when inserting data from California_98cc/98cc_41cd_California.shp."}
2 {"Altered when inserting data from NewYork_98cc/98cc_10cd_NewYork.shp."}
3 {"Altered when inserting data from NewYork_98cc/98cc_13cd_NewYork.shp."}
4 <NA>
5 {"Altered when inserting data from Mississippi_98cc/98cc_1cd_Mississippi.shp."}
6 {"Altered when inserting data from Mississippi_98cc/98cc_2cd_Mississippi.shp."}
BESTDEC FINALNOTE RNOTE LASTCHANGE FROMCOUNTY
1 <NA> <NA> <NA> 2016-05-20 13:07:37.070991 F
2 <NA> <NA> <NA> 2016-05-20 13:09:36.525098 F
3 <NA> <NA> <NA> 2016-05-20 13:09:36.572309 F
4 <NA> {"From 1994 TigerLines"} <NA> 2016-05-20 13:04:54.104494 F
5 <NA> <NA> <NA> 2016-05-20 13:09:01.573128 F
6 <NA> <NA> <NA> 2016-05-20 13:09:01.604169 F
geometry
1 MULTIPOLYGON (((-117.2279 3...
2 MULTIPOLYGON (((-73.83544 4...
3 MULTIPOLYGON (((-73.9948 40...
4 MULTIPOLYGON (((-88.2635 42...
5 MULTIPOLYGON (((-88.20581 3...
6 MULTIPOLYGON (((-90.73618 3...
Reading layer `districts101' from data source
`C:\Users\krisf\OneDrive\Documentos\STA9750-2024-FALL\congress_shapefiles\districts101\districtShapes\districts101.shp'
using driver `ESRI Shapefile'
Simple feature collection with 436 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -178.3395 ymin: 18.91383 xmax: 177.7388 ymax: 71.3857
Geodetic CRS: GRS 1980(IUGG, 1980)
[1] "Data for Congress 101"
Simple feature collection with 6 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -117.2819 ymin: 31.6099 xmax: -73.83379 ymax: 42.4962
Geodetic CRS: GRS 1980(IUGG, 1980)
STATENAME ID DISTRICT STARTCONG ENDCONG DISTRICTSI COUNTY PAGE
1 California 006099102041 41 99 102 <NA> <NA> <NA>
2 New York 036099102010 10 99 102 <NA> <NA> <NA>
3 New York 036099102013 13 99 102 <NA> <NA> <NA>
4 Illinois 017098102012 12 98 102 <NA> <NA> <NA>
5 Mississippi 028099102001 1 99 102 <NA> <NA> <NA>
6 Mississippi 028099102002 2 99 102 <NA> <NA> <NA>
LAW
1 <NA>
2 <NA>
3 <NA>
4 <NA>
5 <NA>
6 <NA>
NOTE
1 {"Altered when inserting data from California_98cc/98cc_41cd_California.shp."}
2 {"Altered when inserting data from NewYork_98cc/98cc_10cd_NewYork.shp."}
3 {"Altered when inserting data from NewYork_98cc/98cc_13cd_NewYork.shp."}
4 <NA>
5 {"Altered when inserting data from Mississippi_98cc/98cc_1cd_Mississippi.shp."}
6 {"Altered when inserting data from Mississippi_98cc/98cc_2cd_Mississippi.shp."}
BESTDEC FINALNOTE RNOTE LASTCHANGE FROMCOUNTY
1 <NA> <NA> <NA> 2016-05-20 13:07:37.070991 F
2 <NA> <NA> <NA> 2016-05-20 13:09:36.525098 F
3 <NA> <NA> <NA> 2016-05-20 13:09:36.572309 F
4 <NA> {"From 1994 TigerLines"} <NA> 2016-05-20 13:04:54.104494 F
5 <NA> <NA> <NA> 2016-05-20 13:09:01.573128 F
6 <NA> <NA> <NA> 2016-05-20 13:09:01.604169 F
geometry
1 MULTIPOLYGON (((-117.2279 3...
2 MULTIPOLYGON (((-73.83544 4...
3 MULTIPOLYGON (((-73.9948 40...
4 MULTIPOLYGON (((-88.2635 42...
5 MULTIPOLYGON (((-88.20581 3...
6 MULTIPOLYGON (((-90.73618 3...
Reading layer `districts102' from data source
`C:\Users\krisf\OneDrive\Documentos\STA9750-2024-FALL\congress_shapefiles\districts102\districtShapes\districts102.shp'
using driver `ESRI Shapefile'
Simple feature collection with 436 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -178.3395 ymin: 18.91383 xmax: 177.7388 ymax: 71.3857
Geodetic CRS: GRS 1980(IUGG, 1980)
[1] "Data for Congress 102"
Simple feature collection with 6 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -117.2819 ymin: 31.6099 xmax: -73.83379 ymax: 42.4962
Geodetic CRS: GRS 1980(IUGG, 1980)
STATENAME ID DISTRICT STARTCONG ENDCONG DISTRICTSI COUNTY PAGE
1 California 006099102041 41 99 102 <NA> <NA> <NA>
2 New York 036099102010 10 99 102 <NA> <NA> <NA>
3 New York 036099102013 13 99 102 <NA> <NA> <NA>
4 Illinois 017098102012 12 98 102 <NA> <NA> <NA>
5 Mississippi 028099102001 1 99 102 <NA> <NA> <NA>
6 Mississippi 028099102002 2 99 102 <NA> <NA> <NA>
LAW
1 <NA>
2 <NA>
3 <NA>
4 <NA>
5 <NA>
6 <NA>
NOTE
1 {"Altered when inserting data from California_98cc/98cc_41cd_California.shp."}
2 {"Altered when inserting data from NewYork_98cc/98cc_10cd_NewYork.shp."}
3 {"Altered when inserting data from NewYork_98cc/98cc_13cd_NewYork.shp."}
4 <NA>
5 {"Altered when inserting data from Mississippi_98cc/98cc_1cd_Mississippi.shp."}
6 {"Altered when inserting data from Mississippi_98cc/98cc_2cd_Mississippi.shp."}
BESTDEC FINALNOTE RNOTE LASTCHANGE FROMCOUNTY
1 <NA> <NA> <NA> 2016-05-20 13:07:37.070991 F
2 <NA> <NA> <NA> 2016-05-20 13:09:36.525098 F
3 <NA> <NA> <NA> 2016-05-20 13:09:36.572309 F
4 <NA> {"From 1994 TigerLines"} <NA> 2016-05-20 13:04:54.104494 F
5 <NA> <NA> <NA> 2016-05-20 13:09:01.573128 F
6 <NA> <NA> <NA> 2016-05-20 13:09:01.604169 F
geometry
1 MULTIPOLYGON (((-117.2279 3...
2 MULTIPOLYGON (((-73.83544 4...
3 MULTIPOLYGON (((-73.9948 40...
4 MULTIPOLYGON (((-88.2635 42...
5 MULTIPOLYGON (((-88.20581 3...
6 MULTIPOLYGON (((-90.73618 3...
Reading layer `districts103' from data source
`C:\Users\krisf\OneDrive\Documentos\STA9750-2024-FALL\congress_shapefiles\districts103\districtShapes\districts103.shp'
using driver `ESRI Shapefile'
Simple feature collection with 436 features and 15 fields (with 1 geometry empty)
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -179.1473 ymin: 18.9177 xmax: 179.7785 ymax: 71.35256
Geodetic CRS: NAD83
[1] "Data for Congress 103"
Simple feature collection with 6 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -118.5992 ymin: 33.99573 xmax: -73.795 ymax: 42.63753
Geodetic CRS: NAD83
STATENAME ID DISTRICT STARTCONG ENDCONG DISTRICTSI COUNTY PAGE
1 California 006103107026 26 103 107 <NA> <NA> <NA>
2 California 006103107029 29 103 107 <NA> <NA> <NA>
3 California 006103107030 30 103 107 <NA> <NA> <NA>
4 New York 036103105007 7 103 105 <NA> <NA> <NA>
5 Illinois 017103107009 9 103 107 <NA> <NA> <NA>
6 Michigan 026103107012 12 103 107 <NA> <NA> <NA>
LAW NOTE BESTDEC FINALNOTE RNOTE LASTCHANGE
1 <NA> <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
2 <NA> <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
3 <NA> <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
4 <NA> <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
5 <NA> <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
6 <NA> <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
FROMCOUNTY geometry
1 FALSE MULTIPOLYGON (((-118.5075 3...
2 FALSE MULTIPOLYGON (((-118.354 34...
3 FALSE MULTIPOLYGON (((-118.184 34...
4 FALSE MULTIPOLYGON (((-73.8273 40...
5 FALSE MULTIPOLYGON (((-87.77996 4...
6 FALSE MULTIPOLYGON (((-83.03953 4...
Reading layer `districts104' from data source
`C:\Users\krisf\OneDrive\Documentos\STA9750-2024-FALL\congress_shapefiles\districts104\districtShapes\districts104.shp'
using driver `ESRI Shapefile'
Simple feature collection with 436 features and 15 fields (with 1 geometry empty)
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -179.1473 ymin: 18.9177 xmax: 179.7785 ymax: 71.35256
Geodetic CRS: GRS 1980(IUGG, 1980)
[1] "Data for Congress 104"
Simple feature collection with 6 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -118.5992 ymin: 33.99573 xmax: -73.795 ymax: 42.63753
Geodetic CRS: GRS 1980(IUGG, 1980)
STATENAME ID DISTRICT STARTCONG ENDCONG DISTRICTSI COUNTY PAGE
1 California 006103107026 26 103 107 <NA> <NA> <NA>
2 California 006103107029 29 103 107 <NA> <NA> <NA>
3 California 006103107030 30 103 107 <NA> <NA> <NA>
4 New York 036103105007 7 103 105 <NA> <NA> <NA>
5 Illinois 017103107009 9 103 107 <NA> <NA> <NA>
6 Michigan 026103107012 12 103 107 <NA> <NA> <NA>
LAW NOTE BESTDEC RNOTE FROMCOUNTY LASTCHANGE
1 <NA> <NA> <NA> <NA> F 2016-05-29 16:44:10.857626
2 <NA> <NA> <NA> <NA> F 2016-05-29 16:44:10.857626
3 <NA> <NA> <NA> <NA> F 2016-05-29 16:44:10.857626
4 <NA> <NA> <NA> <NA> F 2016-05-29 16:44:10.857626
5 <NA> <NA> <NA> <NA> F 2016-05-29 16:44:10.857626
6 <NA> <NA> <NA> <NA> F 2016-05-29 16:44:10.857626
FINALNOTE geometry
1 {"From US Census website"} MULTIPOLYGON (((-118.5075 3...
2 {"From US Census website"} MULTIPOLYGON (((-118.354 34...
3 {"From US Census website"} MULTIPOLYGON (((-118.184 34...
4 {"From US Census website"} MULTIPOLYGON (((-73.8273 40...
5 {"From US Census website"} MULTIPOLYGON (((-87.77996 4...
6 {"From US Census website"} MULTIPOLYGON (((-83.03953 4...
Reading layer `districts105' from data source
`C:\Users\krisf\OneDrive\Documentos\STA9750-2024-FALL\congress_shapefiles\districts105\districtShapes\districts105.shp'
using driver `ESRI Shapefile'
Simple feature collection with 436 features and 15 fields (with 1 geometry empty)
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -179.1473 ymin: 18.9177 xmax: 179.7785 ymax: 71.35256
Geodetic CRS: GRS 1980(IUGG, 1980)
[1] "Data for Congress 105"
Simple feature collection with 6 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -118.5992 ymin: 25.80332 xmax: -80.32127 ymax: 34.33793
Geodetic CRS: GRS 1980(IUGG, 1980)
STATENAME ID DISTRICT STARTCONG ENDCONG DISTRICTSI COUNTY PAGE
1 California 006103107026 26 103 107 <NA> <NA> <NA>
2 California 006103107029 29 103 107 <NA> <NA> <NA>
3 California 006103107030 30 103 107 <NA> <NA> <NA>
4 Florida 012105107013 13 105 107 <NA> <NA> <NA>
5 Florida 012105107014 14 105 107 <NA> <NA> <NA>
6 Florida 012105107015 15 105 107 <NA> <NA> <NA>
LAW NOTE BESTDEC RNOTE FROMCOUNTY LASTCHANGE
1 <NA> <NA> <NA> <NA> F 2016-05-29 16:44:10.857626
2 <NA> <NA> <NA> <NA> F 2016-05-29 16:44:10.857626
3 <NA> <NA> <NA> <NA> F 2016-05-29 16:44:10.857626
4 <NA> <NA> <NA> <NA> F 2016-05-29 16:44:10.857626
5 <NA> <NA> <NA> <NA> F 2016-05-29 16:44:10.857626
6 <NA> <NA> <NA> <NA> F 2016-05-29 16:44:10.857626
FINALNOTE geometry
1 {"From US Census website"} MULTIPOLYGON (((-118.5075 3...
2 {"From US Census website"} MULTIPOLYGON (((-118.354 34...
3 {"From US Census website"} MULTIPOLYGON (((-118.184 34...
4 {"From US Census website"} MULTIPOLYGON (((-82.42332 2...
5 {"From US Census website"} MULTIPOLYGON (((-82.04014 2...
6 {"From US Census website"} MULTIPOLYGON (((-80.71132 2...
Reading layer `districts106' from data source
`C:\Users\krisf\OneDrive\Documentos\STA9750-2024-FALL\congress_shapefiles\districts106\districtShapes\districts106.shp'
using driver `ESRI Shapefile'
Simple feature collection with 436 features and 15 fields (with 1 geometry empty)
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -179.1473 ymin: 18.9177 xmax: 179.7785 ymax: 71.35256
Geodetic CRS: GRS 1980(IUGG, 1980)
[1] "Data for Congress 106"
Simple feature collection with 6 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -118.5992 ymin: 25.80332 xmax: -80.32127 ymax: 34.33793
Geodetic CRS: GRS 1980(IUGG, 1980)
STATENAME ID DISTRICT STARTCONG ENDCONG DISTRICTSI COUNTY PAGE
1 California 006103107026 26 103 107 <NA> <NA> <NA>
2 California 006103107029 29 103 107 <NA> <NA> <NA>
3 California 006103107030 30 103 107 <NA> <NA> <NA>
4 Florida 012105107013 13 105 107 <NA> <NA> <NA>
5 Florida 012105107014 14 105 107 <NA> <NA> <NA>
6 Florida 012105107015 15 105 107 <NA> <NA> <NA>
LAW NOTE BESTDEC RNOTE FROMCOUNTY LASTCHANGE
1 <NA> <NA> <NA> <NA> F 2016-05-29 16:44:10.857626
2 <NA> <NA> <NA> <NA> F 2016-05-29 16:44:10.857626
3 <NA> <NA> <NA> <NA> F 2016-05-29 16:44:10.857626
4 <NA> <NA> <NA> <NA> F 2016-05-29 16:44:10.857626
5 <NA> <NA> <NA> <NA> F 2016-05-29 16:44:10.857626
6 <NA> <NA> <NA> <NA> F 2016-05-29 16:44:10.857626
FINALNOTE geometry
1 {"From US Census website"} MULTIPOLYGON (((-118.5075 3...
2 {"From US Census website"} MULTIPOLYGON (((-118.354 34...
3 {"From US Census website"} MULTIPOLYGON (((-118.184 34...
4 {"From US Census website"} MULTIPOLYGON (((-82.42332 2...
5 {"From US Census website"} MULTIPOLYGON (((-82.04014 2...
6 {"From US Census website"} MULTIPOLYGON (((-80.71132 2...
Reading layer `districts107' from data source
`C:\Users\krisf\OneDrive\Documentos\STA9750-2024-FALL\congress_shapefiles\districts107\districtShapes\districts107.shp'
using driver `ESRI Shapefile'
Simple feature collection with 436 features and 15 fields (with 1 geometry empty)
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -179.1473 ymin: 18.9177 xmax: 179.7785 ymax: 71.35256
Geodetic CRS: GRS 1980(IUGG, 1980)
[1] "Data for Congress 107"
Simple feature collection with 6 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -118.5992 ymin: 25.80332 xmax: -80.32127 ymax: 34.33793
Geodetic CRS: GRS 1980(IUGG, 1980)
STATENAME ID DISTRICT STARTCONG ENDCONG DISTRICTSI COUNTY PAGE
1 California 006103107026 26 103 107 <NA> <NA> <NA>
2 California 006103107029 29 103 107 <NA> <NA> <NA>
3 California 006103107030 30 103 107 <NA> <NA> <NA>
4 Florida 012105107013 13 105 107 <NA> <NA> <NA>
5 Florida 012105107014 14 105 107 <NA> <NA> <NA>
6 Florida 012105107015 15 105 107 <NA> <NA> <NA>
LAW NOTE BESTDEC RNOTE FROMCOUNTY LASTCHANGE
1 <NA> <NA> <NA> <NA> F 2016-05-29 16:44:10.857626
2 <NA> <NA> <NA> <NA> F 2016-05-29 16:44:10.857626
3 <NA> <NA> <NA> <NA> F 2016-05-29 16:44:10.857626
4 <NA> <NA> <NA> <NA> F 2016-05-29 16:44:10.857626
5 <NA> <NA> <NA> <NA> F 2016-05-29 16:44:10.857626
6 <NA> <NA> <NA> <NA> F 2016-05-29 16:44:10.857626
FINALNOTE geometry
1 {"From US Census website"} MULTIPOLYGON (((-118.5075 3...
2 {"From US Census website"} MULTIPOLYGON (((-118.354 34...
3 {"From US Census website"} MULTIPOLYGON (((-118.184 34...
4 {"From US Census website"} MULTIPOLYGON (((-82.42332 2...
5 {"From US Census website"} MULTIPOLYGON (((-82.04014 2...
6 {"From US Census website"} MULTIPOLYGON (((-80.71132 2...
Reading layer `districts108' from data source
`C:\Users\krisf\OneDrive\Documentos\STA9750-2024-FALL\congress_shapefiles\districts108\districtShapes\districts108.shp'
using driver `ESRI Shapefile'
Simple feature collection with 436 features and 15 fields (with 1 geometry empty)
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -179.1473 ymin: 18.9177 xmax: 179.7785 ymax: 71.35256
Geodetic CRS: GRS 1980(IUGG, 1980)
[1] "Data for Congress 108"
Simple feature collection with 6 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -97.353 ymin: 25.60628 xmax: -80.25827 ymax: 36.4996
Geodetic CRS: GRS 1980(IUGG, 1980)
STATENAME ID DISTRICT STARTCONG ENDCONG DISTRICTSI COUNTY PAGE LAW
1 Texas 048108108018 18 108 108 <NA> <NA> <NA> <NA>
2 Texas 048108108024 24 108 108 <NA> <NA> <NA> <NA>
3 Florida 012108112021 21 108 112 <NA> <NA> <NA> <NA>
4 Texas 048108108030 30 108 108 <NA> <NA> <NA> <NA>
5 Texas 048108108032 32 108 108 <NA> <NA> <NA> <NA>
6 Arkansas 005108112003 3 108 112 <NA> <NA> <NA> <NA>
NOTE BESTDEC FINALNOTE RNOTE LASTCHANGE
1 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
2 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
3 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
4 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
5 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
6 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
FROMCOUNTY geometry
1 F MULTIPOLYGON (((-95.49837 2...
2 F MULTIPOLYGON (((-97.13459 3...
3 F MULTIPOLYGON (((-80.30847 2...
4 F MULTIPOLYGON (((-97.03409 3...
5 F MULTIPOLYGON (((-96.8447 32...
6 F MULTIPOLYGON (((-94.43377 3...
Reading layer `districts109' from data source
`C:\Users\krisf\OneDrive\Documentos\STA9750-2024-FALL\congress_shapefiles\districts109\districtShapes\districts109.shp'
using driver `ESRI Shapefile'
Simple feature collection with 436 features and 15 fields (with 1 geometry empty)
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -179.1473 ymin: 18.9177 xmax: 179.7785 ymax: 71.35256
Geodetic CRS: GRS 1980(IUGG, 1980)
[1] "Data for Congress 109"
Simple feature collection with 6 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -94.61792 ymin: 24.5447 xmax: -80.03136 ymax: 36.4996
Geodetic CRS: GRS 1980(IUGG, 1980)
STATENAME ID DISTRICT STARTCONG ENDCONG DISTRICTSI COUNTY PAGE LAW
1 Florida 012108112021 21 108 112 <NA> <NA> <NA> <NA>
2 Arkansas 005108112003 3 108 112 <NA> <NA> <NA> <NA>
3 Florida 012108112017 17 108 112 <NA> <NA> <NA> <NA>
4 Florida 012108112018 18 108 112 <NA> <NA> <NA> <NA>
5 Florida 012108112020 20 108 112 <NA> <NA> <NA> <NA>
6 Florida 012108112022 22 108 112 <NA> <NA> <NA> <NA>
NOTE BESTDEC FINALNOTE RNOTE LASTCHANGE
1 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
2 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
3 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
4 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
5 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
6 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
FROMCOUNTY geometry
1 F MULTIPOLYGON (((-80.30847 2...
2 F MULTIPOLYGON (((-94.43377 3...
3 F MULTIPOLYGON (((-80.18457 2...
4 F MULTIPOLYGON (((-80.31227 2...
5 F MULTIPOLYGON (((-80.21477 2...
6 F MULTIPOLYGON (((-80.09986 2...
Reading layer `districts110' from data source
`C:\Users\krisf\OneDrive\Documentos\STA9750-2024-FALL\congress_shapefiles\districts110\districtShapes\districts110.shp'
using driver `ESRI Shapefile'
Simple feature collection with 436 features and 15 fields (with 1 geometry empty)
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -179.1473 ymin: 18.9177 xmax: 179.7785 ymax: 71.35256
Geodetic CRS: GRS 1980(IUGG, 1980)
[1] "Data for Congress 110"
Simple feature collection with 6 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -94.61792 ymin: 24.5447 xmax: -80.03136 ymax: 36.4996
Geodetic CRS: GRS 1980(IUGG, 1980)
STATENAME ID DISTRICT STARTCONG ENDCONG DISTRICTSI COUNTY PAGE LAW
1 Florida 012108112021 21 108 112 <NA> <NA> <NA> <NA>
2 Arkansas 005108112003 3 108 112 <NA> <NA> <NA> <NA>
3 Florida 012108112017 17 108 112 <NA> <NA> <NA> <NA>
4 Florida 012108112018 18 108 112 <NA> <NA> <NA> <NA>
5 Florida 012108112020 20 108 112 <NA> <NA> <NA> <NA>
6 Florida 012108112022 22 108 112 <NA> <NA> <NA> <NA>
NOTE BESTDEC FINALNOTE RNOTE LASTCHANGE
1 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
2 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
3 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
4 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
5 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
6 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
FROMCOUNTY geometry
1 F MULTIPOLYGON (((-80.30847 2...
2 F MULTIPOLYGON (((-94.43377 3...
3 F MULTIPOLYGON (((-80.18457 2...
4 F MULTIPOLYGON (((-80.31227 2...
5 F MULTIPOLYGON (((-80.21477 2...
6 F MULTIPOLYGON (((-80.09986 2...
Reading layer `districts111' from data source
`C:\Users\krisf\OneDrive\Documentos\STA9750-2024-FALL\congress_shapefiles\districts111\districtShapes\districts111.shp'
using driver `ESRI Shapefile'
Simple feature collection with 436 features and 15 fields (with 1 geometry empty)
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -179.1473 ymin: 18.9177 xmax: 179.7785 ymax: 71.35256
Geodetic CRS: GRS 1980(IUGG, 1980)
[1] "Data for Congress 111"
Simple feature collection with 6 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -94.61792 ymin: 24.5447 xmax: -80.03136 ymax: 36.4996
Geodetic CRS: GRS 1980(IUGG, 1980)
STATENAME ID DISTRICT STARTCONG ENDCONG DISTRICTSI COUNTY PAGE LAW
1 Florida 012108112021 21 108 112 <NA> <NA> <NA> <NA>
2 Arkansas 005108112003 3 108 112 <NA> <NA> <NA> <NA>
3 Florida 012108112017 17 108 112 <NA> <NA> <NA> <NA>
4 Florida 012108112018 18 108 112 <NA> <NA> <NA> <NA>
5 Florida 012108112020 20 108 112 <NA> <NA> <NA> <NA>
6 Florida 012108112022 22 108 112 <NA> <NA> <NA> <NA>
NOTE BESTDEC FINALNOTE RNOTE LASTCHANGE
1 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
2 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
3 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
4 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
5 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
6 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
FROMCOUNTY geometry
1 F MULTIPOLYGON (((-80.30847 2...
2 F MULTIPOLYGON (((-94.43377 3...
3 F MULTIPOLYGON (((-80.18457 2...
4 F MULTIPOLYGON (((-80.31227 2...
5 F MULTIPOLYGON (((-80.21477 2...
6 F MULTIPOLYGON (((-80.09986 2...
Reading layer `districts112' from data source
`C:\Users\krisf\OneDrive\Documentos\STA9750-2024-FALL\congress_shapefiles\districts112\districtShapes\districts112.shp'
using driver `ESRI Shapefile'
Simple feature collection with 436 features and 15 fields (with 1 geometry empty)
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -179.1473 ymin: 18.9177 xmax: 179.7785 ymax: 71.35256
Geodetic CRS: GRS 1980(IUGG, 1980)
[1] "Data for Congress 112"
Simple feature collection with 6 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -94.61792 ymin: 24.5447 xmax: -80.03136 ymax: 36.4996
Geodetic CRS: GRS 1980(IUGG, 1980)
STATENAME ID DISTRICT STARTCONG ENDCONG DISTRICTSI COUNTY PAGE LAW
1 Florida 012108112021 21 108 112 <NA> <NA> <NA> <NA>
2 Arkansas 005108112003 3 108 112 <NA> <NA> <NA> <NA>
3 Florida 012108112017 17 108 112 <NA> <NA> <NA> <NA>
4 Florida 012108112018 18 108 112 <NA> <NA> <NA> <NA>
5 Florida 012108112020 20 108 112 <NA> <NA> <NA> <NA>
6 Florida 012108112022 22 108 112 <NA> <NA> <NA> <NA>
NOTE BESTDEC FINALNOTE RNOTE LASTCHANGE
1 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
2 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
3 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
4 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
5 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
6 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
FROMCOUNTY geometry
1 F MULTIPOLYGON (((-80.30847 2...
2 F MULTIPOLYGON (((-94.43377 3...
3 F MULTIPOLYGON (((-80.18457 2...
4 F MULTIPOLYGON (((-80.31227 2...
5 F MULTIPOLYGON (((-80.21477 2...
6 F MULTIPOLYGON (((-80.09986 2...
Reading layer `districts113' from data source
`C:\Users\krisf\OneDrive\Documentos\STA9750-2024-FALL\congress_shapefiles\districts113\districtShapes\districts113.shp'
using driver `ESRI Shapefile'
Simple feature collection with 436 features and 15 fields (with 1 geometry empty)
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -179.1473 ymin: 18.91383 xmax: 179.7785 ymax: 71.35256
Geodetic CRS: NAD83
[1] "Data for Congress 113"
Simple feature collection with 6 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -91.82307 ymin: 29.41135 xmax: -66.94983 ymax: 47.45969
Geodetic CRS: NAD83
STATENAME ID DISTRICT STARTCONG ENDCONG DISTRICTSI COUNTY PAGE LAW
1 Louisiana 022113114006 6 113 114 <NA> <NA> <NA> <NA>
2 Maine 023113114001 1 113 114 <NA> <NA> <NA> <NA>
3 Maine 023113114002 2 113 114 <NA> <NA> <NA> <NA>
4 Maryland 024113114001 1 113 114 <NA> <NA> <NA> <NA>
5 Maryland 024113114002 2 113 114 <NA> <NA> <NA> <NA>
6 Maryland 024113114003 3 113 114 <NA> <NA> <NA> <NA>
NOTE BESTDEC FINALNOTE RNOTE LASTCHANGE
1 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
2 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
3 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
4 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
5 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
6 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
FROMCOUNTY geometry
1 FALSE MULTIPOLYGON (((-91.82288 3...
2 FALSE MULTIPOLYGON (((-70.98905 4...
3 FALSE MULTIPOLYGON (((-71.08216 4...
4 FALSE MULTIPOLYGON (((-77.31156 3...
5 FALSE MULTIPOLYGON (((-76.8763 39...
6 FALSE MULTIPOLYGON (((-77.15622 3...
Data III : Congressional Boundary Files 2014 to Present
Task2 : Code to download the Congressional Shapefiles 2014-2022
Download congressional shapefiles from the US Census Bureau for all US Congresses from 2014 to 2022.
Grouping by years:
# Load necessary libraries
library(sf)
# Define the download directory
<- function() {
get_download_dir <- "data/shapefiles/census_congressional_districts"
dir if (!dir.exists(dir)) dir.create(dir)
return(dir)
}
# Define years and session numbers (2014-2022 covers 113th to 117th congressional sessions)
<- list(
years_sessions "2014" = "114", "2015" = "114", "2016" = "115",
"2017" = "115", "2018" = "116", "2019" = "116",
"2020" = "116", "2021" = "116", "2022" = "116"
)
# Define the base URL structure for Census Bureau shape files for congressional districts
<- "http://www2.census.gov/geo/tiger/TIGER"
base_url
# Function to download, unzip, and display the first lines of shapefiles
<- function(shapefile_list) {
download_and_unzip_shapefiles <- get_download_dir()
download_dir
for (item in shapefile_list) {
<- as.character(item$year)
year <- ifelse(!is.null(item$session), item$session, years_sessions[[year]])
session
# Define file name and download URL
<- paste0("tl_", year, "_us_cd", session, ".zip")
file_name <- paste0(base_url, year, "/CD/", file_name)
url <- file.path(download_dir, file_name)
destfile
# Download and unzip if not already done
if (!file.exists(destfile)) {
tryCatch({
message("Downloading: ", url)
download.file(url, destfile, method = 'curl', mode = "wb")
message("Downloaded successfully: ", file_name)
# Unzip file
<- file.path(download_dir, paste0("unzipped_", session))
unzip_dir if (!dir.exists(unzip_dir)) dir.create(unzip_dir)
unzip(destfile, exdir = unzip_dir)
# Construct shapefile path and read it
<- file.path(unzip_dir, paste0("tl_", year, "_us_cd", session, ".shp"))
shp_file if (file.exists(shp_file)) {
<- sf::st_read(shp_file)
shapefile_data print(head(shapefile_data)) # Display the first few lines of the shapefile
else {
} message("Shapefile not found at: ", shp_file)
}
error = function(e) {
}, message("Failed to download ", url, ": ", e$message)
})else {
} message("File already exists: ", file_name)
}
}
}
# List of shapefiles to download
<- list(
shapefiles_to_download list(year = 2014, state_code = "us"),
list(year = 2016, state_code = "us"),
list(year = 2020, state_code = "us"),
list(year = 2022, state_code = "us")
)
# Run the function
download_and_unzip_shapefiles(shapefiles_to_download)
Reading one of the shapefiles to make sure information has been correctly downloaded.
# Define the path to the shapefile
<- file.path(unzip_dir, "tl_2020_us_cd116.shp") shapefile_path
Error in eval(expr, envir, enclos): object 'unzip_dir' not found
# Read the shapefile using sf
<- st_read(shapefile_path) shapefile_data
Error in eval(expr, envir, enclos): object 'shapefile_path' not found
# View the first few rows of the data
head(shapefile_data)
Simple feature collection with 6 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -91.82307 ymin: 29.41135 xmax: -66.94983 ymax: 47.45969
Geodetic CRS: NAD83
STATENAME ID DISTRICT STARTCONG ENDCONG DISTRICTSI COUNTY PAGE LAW
1 Louisiana 022113114006 6 113 114 <NA> <NA> <NA> <NA>
2 Maine 023113114001 1 113 114 <NA> <NA> <NA> <NA>
3 Maine 023113114002 2 113 114 <NA> <NA> <NA> <NA>
4 Maryland 024113114001 1 113 114 <NA> <NA> <NA> <NA>
5 Maryland 024113114002 2 113 114 <NA> <NA> <NA> <NA>
6 Maryland 024113114003 3 113 114 <NA> <NA> <NA> <NA>
NOTE BESTDEC FINALNOTE RNOTE LASTCHANGE
1 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
2 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
3 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
4 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
5 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
6 <NA> <NA> {"From US Census website"} <NA> 2016-05-29 16:44:10.857626
FROMCOUNTY geometry
1 FALSE MULTIPOLYGON (((-91.82288 3...
2 FALSE MULTIPOLYGON (((-70.98905 4...
3 FALSE MULTIPOLYGON (((-71.08216 4...
4 FALSE MULTIPOLYGON (((-77.31156 3...
5 FALSE MULTIPOLYGON (((-76.8763 39...
6 FALSE MULTIPOLYGON (((-77.15622 3...
Task 3: Exploration of Vote Count Data
3.1 Variation on the number of sets in the US House of Representatives between 1976 and 2022
Which states have gained and lost the most seats in the US House of Representatives between 1976 and 2022?
Code to calculate the change on the number of seats in the US House of Representatives in 1976 vs. 2022.
#First we will get the sum per disctricts/State <- ushouse_data %>% seats_per_state group_by(,year,state) %>% summarize(districts_count = n_distinct(district), .groups = 'drop') #Calculate the changes betwwen 1976 and 1922 library(dplyr) <- seats_per_state %>% seat_changes_1976_2022 filter(year %in% c(1976, 2022)) %>% spread(key = year, value = districts_count) %>% mutate(seat_change = `2022` - `1976`) # Separate the states with the most gains and losses <- seat_changes_1976_2022 %>% arrange(desc(seat_change)) %>% head(5) top_gains <- seat_changes_1976_2022 %>% arrange(seat_change) %>% head(5) top_losses
Tables and Bar Plots for the Top Gains and Losses.
# Load knitr for kable library(knitr) library(ggplot2) library(dplyr) # Display the table %>% top_gains kable(caption = "Top 5 Most Gained Seats 1976 vs 2022")
Top 5 Most Gained Seats 1976 vs 2022 state 1976 2022 seat_change TEXAS 24 38 14 FLORIDA 15 28 13 CALIFORNIA 43 52 9 ARIZONA 4 9 5 GEORGIA 10 14 4 %>% top_losses kable(caption = "Top 5 Most Lost Seats 1976 vs 2022")
Top 5 Most Lost Seats 1976 vs 2022 state 1976 2022 seat_change NEW YORK 39 26 -13 OHIO 23 15 -8 PENNSYLVANIA 25 17 -8 ILLINOIS 24 17 -7 MICHIGAN 19 13 -6 # Plot for top gains <- ggplot(top_gains, aes(x = reorder(state, seat_change), y = seat_change)) + gain_plot geom_bar(stat = "identity", fill = "steelblue") + labs(title = "Top States by Seat Gains (1976 to 2022)", x = "State", y = "Change in Seats") + theme_minimal() + coord_flip() # Plot for top losses <- ggplot(top_losses, aes(x = reorder(state, seat_change), y = seat_change)) + loss_plot geom_bar(stat = "identity", fill = "firebrick") + labs(title = "Top States by Seat Losses (1976 to 2022)", x = "State", y = "Change in Seats") + theme_minimal() + coord_flip() print(gain_plot)
print (loss_plot)
Per the results the states that have gained more seats between 1976 and 2022 are Texas, Florida, California, Arizona and Georgia. On the other hand, the states whose have lost more seats are New York, Ohio, Pennsylvania, Illinois and Michigan.
#Listing all the datasets
list.files()
[1] "_quarto.yml" "1976-2020-president.csv"
[3] "1976-2020-president.tab" "1976-2022-house.tab"
[5] "2022_expenses.csv" "2022_fare_revenue.xlsx"
[7] "Borough Boundaries.zip" "cb_2018_us_cd116_5m.zip"
[9] "codebook-us-house-1976-2020.md" "congress_113th_shapefile.zip"
[11] "congress_shapefiles" "data"
[13] "docs" "ECV.csv"
[15] "ECVs.csv" "election_results.gif"
[17] "MP #01.R" "mp001_quarto.yml.qmd"
[19] "MP01.pdf" "mp01.qmd"
[21] "mp01_quarto.yml" "MP02 PDF.pdf"
[23] "mp02.qmd" "mp02test.R"
[25] "mp03-1.pdf" "mp03.pdf"
[27] "mp03.qmd" "mp03.rmarkdown"
[29] "mp03_cache" "mp03_files"
[31] "name.basics.tsv.gz" "README"
[33] "ridership.xlsx" "sources-president.tab"
[35] "STA9750-2024-FALL" "STA9750-2024-FALL.Rproj"
[37] "state_boundaries.zip" "test for GitHub"
[39] "title.basics.tsv.gz" "title.crew.tsv.gz"
[41] "title.episode.tsv.gz" "title.principals.tsv.gz"
[43] "title.ratings.tsv.gz" "Week 4 - Pre assigment.R"
#Loading US President Database
<- read.csv("1976-2020-president.csv")
uspresident_data View(uspresident_data)
#Loading US House Database
<- read.delim("1976-2022-house.tab")
ushouse_data View(ushouse_data)
3.2 New York State Fusion Parties Analysis.
New York State has a unique “fusion” voting system where one candidate can appear on multiple “lines” on the ballot and their vote counts are totaled. For instance, in 2022, Jerrold Nadler appeared on both the Democrat and Working Families party lines for NYS’ 12th Congressional District. He received 200,890 votes total (184,872 as a Democrat and 16,018 as WFP), easily defeating Michael Zumbluskas, who received 44,173 votes across three party lines (Republican, Conservative, and Parent).
Are there any elections in our data where the election would have had a different outcome if the “fusion” system was not used and candidates only received the votes their received from their “major party line” (Democrat or Republican) and not their total number of votes across all lines?
library(dplyr) library(knitr) library(ggplot2) #First we'll filter by total of votes per State <- ushouse_data %>% ny_votes group_by(year,state) %>% filter(state=="NEW YORK") <- ny_votes %>% nycandidate_total group_by(candidate, year) %>% mutate(candidatetotal = sum(candidatevotes, na.rm = TRUE)) %>% ungroup() <- nycandidate_total %>% nycandidate_winner group_by(year, district) %>% slice_max(candidatetotal, n = 1) %>% select(year, candidate, candidatetotal) %>% distinct(candidate, .keep_all = TRUE) <- ny_votes %>% nycandidate_parties_total group_by(candidate, year) %>% mutate(candidatetotal = sum(candidatevotes, na.rm = TRUE)) %>% ungroup() %>% filter(party == "DEMOCRAT" | party == "REPUBLICAN") <- nycandidate_total %>% nycandidate_parties_winner group_by(year, district) %>% slice_max(candidatevotes, n = 1) %>% select(year, candidate, candidatevotes) if (any(nycandidate_winner$candidate != nycandidate_parties_winner$candidate)) { print("There are elections that would have had a different outcome.") else { } print("Per data, elections would had not a different outcome.") }
[1] "There are elections that would have had a different outcome."
#View(nycandidate_parties_winner)
Per the results, in our data analysis, yes, there are elections that would have had a different outcome if the candidate was running only under one party.
3.3 Analysis of different trends.
Do presidential candidates tend to run ahead of or run behind congressional candidates in the same state? That is, does a Democratic candidate for president tend to get more votes in a given state than all Democratic congressional candidates in the same state?
Does this trend differ over time? Does it differ across states or across parties? Are any presidents particularly more or less popular than their co-partisans?
Code to summarize the number of votes for US House and Presidential
# Summarize presidential votes (1976-2012)
<- uspresident_data %>%
presidential_votes filter(office == "US PRESIDENT", year >= 1976, year <= 2012) %>%
group_by(year) %>%
summarise(total_president_votes = sum(candidatevotes), .groups = "drop")
# Summarize congressional votes (1976-2012)
<- ushouse_data %>%
congressional_votes filter(office == "US HOUSE", year >= 1976, year <= 2012) %>%
group_by(year) %>%
summarise(total_congress_votes = sum(candidatevotes), .groups = "drop")
# Combine the two datasets
<- presidential_votes %>%
vote_comparison left_join(congressional_votes, by = "year")
library(scales)
# Calculate and format percentage difference
<- vote_comparison %>%
vote_comparison mutate(vote_percentage_difference =
percent((total_president_votes - total_congress_votes) / total_congress_votes))
#head(vote_comparison)
Table showing the difference between the % of voters for Presidents vs. Congress.
%>%
vote_comparison kable(caption = "Vote Percentage Difference between President and Congress - 1976 vs 2022")
year | total_president_votes | total_congress_votes | vote_percentage_difference |
---|---|---|---|
1976 | 81601344 | 74259171 | 9.89% |
1980 | 86496851 | 77873913 | 11.07% |
1984 | 92654861 | 82421874 | 12.42% |
1988 | 91586825 | 81682171 | 12.13% |
1992 | 104599780 | 97281410 | 7.52% |
1996 | 96389818 | 90745365 | 6.22% |
2000 | 105593982 | 98799965 | 6.88% |
2004 | 122349450 | 113191293 | 8.09% |
2008 | 131419253 | 122586298 | 7.21% |
2012 | 129139997 | 122345021 | 5.55% |
$year <- as.factor(vote_comparison$year)
vote_comparison
ggplot(vote_comparison, aes(x = year, y = vote_percentage_difference)) +
geom_bar(stat = "identity", fill = "blue", color = "blue", width = 0.6) +
labs(
title = "Percentage Difference Presidential and Congressional Votes (1976 - 2012)",
x = "Year",
y = "Vote Percentage Difference (%)"
+
) theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1)
)
As we can observe in this graph, the years 1980 and 1988 had the lowest percentage difference between congressional and presidential votes. This likely indicates that, during these elections, the majority of U.S. House representatives were from the same party as the president elected that year. In contrast, in years like 1976 or 2004, the elected president likely faced a Congress controlled by the opposite party.
Comparison Analysis by State, between US House and President votes from 1976 to 2012.
The analysis is very similar as the previous one, but now we need to group by state.
Code to summarize the number of votes per state for US House and Presidential
# Summarize presidential votes (1976-2012)
<- uspresident_data %>%
state_presidential_votes filter(office == "US PRESIDENT", year >= 1976, year <= 2012) %>%
group_by(year, state) %>%
summarise(total_president_votes = sum(candidatevotes), .groups = "drop")
# Summarize congressional votes (1976-2012)
<- ushouse_data %>%
state_congressional_votes filter(office == "US HOUSE", year >= 1976, year <= 2012) %>%
group_by(year, state) %>%
summarise(total_congress_votes = sum(candidatevotes), .groups = "drop")
# Combine the two datasets
<- state_presidential_votes %>%
state_vote_comparison left_join(state_congressional_votes, by =c("year", "state"))
library(scales)
# Calculate and format percentage difference
<- state_vote_comparison %>%
state_vote_comparison mutate(state_vote_percentage_difference =
percent((total_president_votes - total_congress_votes) / total_congress_votes))
%>%
state_vote_comparison arrange(desc(state_vote_percentage_difference)) %>% # Sort by percentage difference
head(10) %>%
kable(
col.names = c("Year", "State", "Presidential Votes", "Congressional Votes", "Percentage Difference"),
caption = "Top 10 Highest Percentage Differences between Presidential and Congressional Votes (1976-2012)"
)
Year | State | Presidential Votes | Congressional Votes | Percentage Difference |
---|---|---|---|---|
1984 | ARKANSAS | 884406 | 463247 | 90.914566% |
2004 | NEW JERSEY | 3611691 | 3284595 | 9.958488% |
1992 | MARYLAND | 1984580 | 1805731 | 9.904521% |
1996 | VIRGINIA | 2416642 | 2199097 | 9.892469% |
2004 | TENNESSEE | 2437319 | 2218738 | 9.851591% |
1988 | MARYLAND | 1714358 | 1560984 | 9.825469% |
2004 | KENTUCKY | 1795882 | 1635243 | 9.823555% |
1980 | MARYLAND | 1540496 | 1403047 | 9.796464% |
2012 | GEORGIA | 3897839 | 3552967 | 9.706592% |
1992 | KENTUCKY | 1492900 | 1360911 | 9.698577% |
In this table, you’ll observe the top 10 highest percentage difference along the years by state.
Comparison Analysis by Pary, between US House and President votes from 1976 to 2012.
In this analysis, we’ll take in consideration the party per year.
Code to summarize the number of votes per state for US House and Presidential
library(ggplot2)
library(dplyr)
library(tidyr)
# Aggregate votes by party for presidential and congressional candidates
<- ushouse_data %>%
congressional_party_votes filter(office == "US HOUSE", year >= 1976, year <= 2012) %>%
group_by(year, party) %>%
summarise(congress_votes = sum(candidatevotes), .groups = "drop")
<- uspresident_data %>%
president_votes filter(office == "US PRESIDENT", year >= 1976, year <= 2012) %>%
group_by(year, party_simplified) %>%
summarise(president_votes = sum(candidatevotes), .groups = "drop") %>%
rename(party = party_simplified)
# Combine the data sets
<- president_votes %>%
president_vote_comparison left_join(congressional_party_votes, by = c("party", "year"))
# Reshape data for plotting (make sure tidyr is loaded)
<- president_vote_comparison %>%
vote_long pivot_longer(cols = c(president_votes, congress_votes),
names_to = "vote_type",
values_to = "votes")
# Create a 'percentage' column for pie chart
<- vote_long %>%
vote_long group_by(vote_type, year) %>%
mutate(percentage = votes / sum(votes))
# Plotting pie charts by year first, then by party
ggplot(vote_long, aes(x = "", y = percentage, fill = party)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y") +
facet_grid(year ~ vote_type) +
labs(title = "Distribution of Votes Over Time by Party for Presidential and Congressional Candidates (1976 - 2012)",
fill = "Party") +
theme_void() +
theme(legend.position = "top") +
scale_fill_brewer(palette = "Set3")
As we can see in these pie charts over the years, in 1992, the Republican Party lost votes to other parties. However, in 2012, the contest was very close between the Democratic and Republican parties.
Importing and Plotting Shape File Data
Task 4: Automate Zip File Extraction
library(ggplot2)
library(sf)
# File name
<- "Borough Boundaries.zip"
zip_file
# Check if the file exists
if (!file.exists(zip_file)) {
stop("The zip file does not exist in the specified path.")
}
# Define the function to read .shp file from a zip archive
<- function(zip_file) {
read_shp_from_zip # Create a temporary directory to extract the zip file
<- tempdir()
td
# Extract the contents of the zip file
<- unzip(zip_file, exdir = td)
zip_contents
# Identify the .shp file among the extracted contents
<- zip_contents[grepl("\\.shp$", zip_contents)]
fname_shp
# Check if a .shp file was found
if (length(fname_shp) == 0) {
stop("No .shp file found in the zip archive.")
}
# Read the shapefile using read_sf
<- read_sf(fname_shp)
nyc_sf
# Return the shapefile data
return(nyc_sf)
}
# Use the function to read the shapefile from the zip
<- read_shp_from_zip(zip_file) nyc_sf
Chloropleth Map: Borough boundaries using ‘sf geom’.
head(nyc_sf)
Simple feature collection with 5 features and 4 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -74.25559 ymin: 40.49613 xmax: -73.70001 ymax: 40.91553
Geodetic CRS: WGS 84
# A tibble: 5 × 5
boro_code boro_name shape_area shape_leng geometry
<dbl> <chr> <dbl> <dbl> <MULTIPOLYGON [°]>
1 3 Brooklyn 1934142776. 728147. (((-73.86327 40.58388, -73.863…
2 5 Staten Island 1623618684. 325910. (((-74.05051 40.56642, -74.050…
3 1 Manhattan 636646082. 360038. (((-74.01093 40.68449, -74.011…
4 2 Bronx 1187174772. 463181. (((-73.89681 40.79581, -73.896…
5 4 Queens 3041418004. 888197. (((-73.82645 40.59053, -73.826…
ggplot(nyc_sf,
aes(geometry=geometry,
fill = shape_area)) +
geom_sf()
Task 5: 2000 Presidential Election Electoral College Results
Code to download and unzip the file for State Boundaries and analyze this file with the US Presidential dataset.
library(ggplot2)
library(sf)
if(!file.exists("state_boundaries.zip")){
download.file("https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_state_500k.zip",
destfile="state_boundaries.zip")
}
# Define the function to read .shp file from a zip archive
<- function(zip_file) {
read_shp_from_zip # Create a temporary directory
<- tempdir();
td # Extract the contents of the zip file
<- unzip("state_boundaries.zip",
zip_contents exdir = td)
# Identify the .shp file among the extracted contents
<- zip_contents[grepl("shp$", zip_contents)]
fname_shp # Read the shapefile into R using st_read
<- st_read(fname_shp)
states_sf return(states_sf)
}
<- read_shp_from_zip("state_boundaries.zip") states_sf
Reading layer `cb_2018_us_state_500k' from data source
`C:\Users\krisf\AppData\Local\Temp\RtmpcJgpbK\cb_2018_us_state_500k.shp'
using driver `ESRI Shapefile'
Simple feature collection with 56 features and 9 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -179.1489 ymin: -14.5487 xmax: 179.7785 ymax: 71.36516
Geodetic CRS: NAD83
# Reshape the wholistic dataset with an epmhasis on the 2000 election
<- uspresident_data %>%
presidentvotes2000_summary filter(year == 2000, party_detailed %in% c("DEMOCRAT", "REPUBLICAN")) %>%
group_by(state, party_detailed) %>%
summarise(candidatevotes = sum(candidatevotes), .groups = "drop") %>%
pivot_wider(names_from = party_detailed, values_from = candidatevotes, values_fill = list(candidatevotes = 0)) %>%
rename(Democratic_votes = DEMOCRAT, Republican_votes = REPUBLICAN) %>%
left_join(
%>%
uspresident_data filter(year == 2000) %>%
select(state, totalvotes) %>%
distinct(),
by = "state"
%>%
) mutate(
Winner = case_when(
> Republican_votes ~ "DEMOCRAT",
Democratic_votes > Democratic_votes ~ "REPUBLICAN",
Republican_votes TRUE ~ "TIE"
)%>%
) select(state, Democratic_votes, Republican_votes, Total_votes = totalvotes, Winner)
# Merge the shapefiles and presidential election data
<- states_sf %>%
states_sf rename("state" = "NAME")
$state <- toupper(states_sf$state)
states_sf<- states_sf %>%
states_sf left_join(presidentvotes2000_summary, by = c("state"))
Chloropleth Visualization of the 2000 Presidential Election Electoral College Results
# Plot the presidential election data and make the map window larger
ggplot(data = states_sf) +
geom_sf(aes(fill = Winner)) +
scale_fill_manual(values = c("DEMOCRAT" = "blue", "REPUBLICAN" = "red")) +
theme_minimal() +
labs(title = "US Election Results by State (2000)", fill = "Party") +
theme(legend.position = "bottom") +
coord_sf(xlim = c(-130, -65), ylim = c(24, 50), expand = FALSE)
Task 6: Advanced Chloropleth Visualization of Electoral College Results
Code to download and unzip the file for State Boundaries and analyze this file with the US Presidential dataset.
<- function(url, destfile) {
safe_download if (!file.exists(destfile)) {
tryCatch(download.file(url, destfile, mode = "wb"),
error = function(e) warning("Failed to download ", url))
}
}
safe_download("https://drive.usercontent.google.com/uc?id=1tkRPPYIKpOICUdO1eJfrHAnHhF7RSorG&export=download", "cb_2018_us_cd116_5m.zip")
##-
<- tempdir();
td <- unzip("cb_2018_us_cd116_5m.zip",
zip_contents exdir = td)
<- zip_contents[grepl("shp$", zip_contents)]
fname_shp <- zip_contents[grepl("dbf$", zip_contents)]
fname_dbf <- zip_contents[grepl("shx$", zip_contents)]
fname_shx
# Load the shapefile
<- "cb_2018_us_cd116_5m.shp"
shapefile_path <- read_sf(fname_shp)
us_sf us_sf
Simple feature collection with 441 features and 8 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -179.1473 ymin: -14.55255 xmax: 179.7785 ymax: 71.35256
Geodetic CRS: NAD83
# A tibble: 441 × 9
STATEFP CD116FP AFFGEOID GEOID LSAD CDSESSN ALAND AWATER
<chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
1 36 05 5001600US3605 3605 C2 116 134550083 184506173
2 01 03 5001600US0103 0103 C2 116 19537640555 471223121
3 01 02 5001600US0102 0102 C2 116 26269902298 303582941
4 08 02 5001600US0802 0802 C2 116 19523675148 236595196
5 12 07 5001600US1207 1207 C2 116 1017471375 110917454
6 12 05 5001600US1205 1205 C2 116 9888522481 239830493
7 29 06 5001600US2906 2906 C2 116 47132969024 528128323
8 29 08 5001600US2908 2908 C2 116 51543398547 444793592
9 24 03 5001600US2403 2403 C2 116 788544340 252344417
10 24 05 5001600US2405 2405 C2 116 3840096355 2066469028
# ℹ 431 more rows
# ℹ 1 more variable: geometry <MULTIPOLYGON [°]>
Advanced Chloropleth Visualization of Electoral College Results (2000)
# Create the repeated data for animation
<- bind_rows(
us_sf_repeats |> mutate(value = rnorm(441), frame = 1),
us_sf |> mutate(value = rnorm(441), frame = 2),
us_sf |> mutate(value = rnorm(441), frame = 3),
us_sf |> mutate(value = rnorm(441), frame = 4),
us_sf |> mutate(value = rnorm(441), frame = 5)
us_sf
)<- st_bbox(us_sf) bbox
# Create the plot with adjusted scaling and color
library(ggplot2)
library(sf)
library(gganimate)
#install.packages("viridis")
library(viridis)
<- st_bbox(us_sf_repeats) # Set bounding box based on spatial data
bbox
# Plot with adjusted scaling, color, and title
ggplot(us_sf_repeats, aes(geometry = geometry, fill = value)) +
geom_sf() +
transition_time(frame) +
scale_fill_viridis_c(option = "magma") +
theme_minimal(base_size = 14) +
labs(
title = "US Election results (2000)",
fill = "Votes"
+
) theme(
legend.position = "bottom",
plot.title = element_text(size = 18)
+
) coord_sf(
xlim = c(-130, -60),
ylim = c(20, 50),
expand = FALSE
)
Evaluating Fairness of ECV Allocation Schemes
Task 7: Evaluating Fairness of ECV Allocation Schemes
Write a fact check evaluating the fairness of the different ECV electoral allocation schemes. To do so, you should first determine which allocation scheme you consider “fairest”.
Analysis and Background:
ECV stands for Effective Closeness Voting, a voting method that focuses on fairness and how votes are allocated in a way that reflects the preferences of the voters. It is often used in the context of proportional representation or electoral systems that aim to allocate seats or power in a way that matches voter preferences.
The Proportional Representation scheme seems to be the fairest, as it allocates ECVs based on the actual proportion of votes each candidate receives, thereby more closely reflecting the will of the people. The Winner-Takes-All system, while simpler, can lead to outcomes that don’t reflect the majority’s preference, as seen in the 2000 election.
On this analysis, we’ll compare the “Winner-Takes -All (WTA) scheme, versus the Proportional Representation.
- First we need a summarized view of the elections results in 2000, per party and the winner for each state.
library(dplyr)
library(tidyr)
# Filter for the 2000 election and focus on the Democratic and Republican parties
<- uspresident_data %>%
presidentvotes2000_summary filter(year == 2000, party_detailed %in% c("DEMOCRAT", "REPUBLICAN")) %>%
group_by(state, party_detailed) %>%
summarise(candidatevotes = sum(candidatevotes), .groups = "drop") %>%
pivot_wider(names_from = party_detailed, values_from = candidatevotes, values_fill = list(candidatevotes = 0)) %>%
rename(Democratic_votes = DEMOCRAT, Republican_votes = REPUBLICAN) %>%
left_join(
%>%
uspresident_data filter(year == 2000) %>%
select(state, totalvotes) %>%
distinct(),
by = "state"
%>%
) mutate(
Winner = case_when(
> Republican_votes ~ "DEMOCRAT",
Democratic_votes > Democratic_votes ~ "REPUBLICAN",
Republican_votes TRUE ~ "TIE"
)%>%
) select(state, Democratic_votes, Republican_votes, Total_votes = totalvotes, Winner)
- We need to create the function for both approaches: WTA and Proportional.
#load the ecv data CSV Manually created, after exploration
<- read.csv("C:/Users/krisf/OneDrive/Documentos/STA9750-2024-FALL/ECVs.csv")
state_ecv_data
#Winner-Takse -All function
<- function(data, state_ecv_data) {
calculate_wta <- data %>%
data left_join(state_ecv_data, by = "state") %>%
mutate(ecv = ifelse(Winner == "DEMOCRAT", ECVs, 0) + ifelse(Winner == "REPUBLICAN", ECVs, 0)) %>%
select(state, Winner, ECVs, ecv)
return(data)
}
#Proportional Representation approach
<- function(data, state_ecv_data) {
calculate_proportional <- data %>%
data left_join(state_ecv_data, by = "state") %>%
mutate(
democrat_ecv = round((Democratic_votes / Total_votes) * ECVs),
republican_ecv = round((Republican_votes / Total_votes) * ECVs)
%>%
) select(state, democrat_ecv, republican_ecv)
return(data)
}
- Setting up the Electoral College Votes (ECVs) per state.
# Calculate the results under each scheme
<- calculate_wta(presidentvotes2000_summary, state_ecv_data)
wta_results <- calculate_proportional(presidentvotes2000_summary, state_ecv_data)
proportional_results
# You can now compare the results
wta_results
# A tibble: 51 × 4
state Winner ECVs ecv
<chr> <chr> <int> <dbl>
1 ALABAMA REPUBLICAN 9 9
2 ALASKA REPUBLICAN 3 3
3 ARIZONA REPUBLICAN 11 11
4 ARKANSAS REPUBLICAN 6 6
5 CALIFORNIA DEMOCRAT 54 54
6 COLORADO REPUBLICAN 10 10
7 CONNECTICUT DEMOCRAT 7 7
8 DELAWARE DEMOCRAT 3 3
9 DISTRICT OF COLUMBIA DEMOCRAT NA NA
10 FLORIDA REPUBLICAN 30 30
# ℹ 41 more rows
proportional_results
# A tibble: 51 × 3
state democrat_ecv republican_ecv
<chr> <dbl> <dbl>
1 ALABAMA 4 5
2 ALASKA 1 2
3 ARIZONA 5 6
4 ARKANSAS 3 3
5 CALIFORNIA 29 22
6 COLORADO 4 5
7 CONNECTICUT 4 3
8 DELAWARE 2 1
9 DISTRICT OF COLUMBIA NA NA
10 FLORIDA 15 15
# ℹ 41 more rows
- Now, we need to apply the schemes and compare the results.
# Calculate the results under each scheme
<- calculate_wta(presidentvotes2000_summary, state_ecv_data)
wta_results <- calculate_proportional(presidentvotes2000_summary, state_ecv_data)
proportional_results
# You can now compare the results
wta_results
# A tibble: 51 × 4
state Winner ECVs ecv
<chr> <chr> <int> <dbl>
1 ALABAMA REPUBLICAN 9 9
2 ALASKA REPUBLICAN 3 3
3 ARIZONA REPUBLICAN 11 11
4 ARKANSAS REPUBLICAN 6 6
5 CALIFORNIA DEMOCRAT 54 54
6 COLORADO REPUBLICAN 10 10
7 CONNECTICUT DEMOCRAT 7 7
8 DELAWARE DEMOCRAT 3 3
9 DISTRICT OF COLUMBIA DEMOCRAT NA NA
10 FLORIDA REPUBLICAN 30 30
# ℹ 41 more rows
proportional_results
# A tibble: 51 × 3
state democrat_ecv republican_ecv
<chr> <dbl> <dbl>
1 ALABAMA 4 5
2 ALASKA 1 2
3 ARIZONA 5 6
4 ARKANSAS 3 3
5 CALIFORNIA 29 22
6 COLORADO 4 5
7 CONNECTICUT 4 3
8 DELAWARE 2 1
9 DISTRICT OF COLUMBIA NA NA
10 FLORIDA 15 15
# ℹ 41 more rows
- Let’s Visualize the Results:
library(ggplot2)
library(dplyr)
library(viridis)
# Prepare data for WTA and Proportional allocation schemes
<- wta_results %>%
wta_results mutate(Scheme = "WTA", Democrat_ECV = ifelse(Winner == "DEMOCRAT", ecv, 0), Republican_ECV = ifelse(Winner == "REPUBLICAN", ecv, 0))
<- proportional_results %>%
proportional_results mutate(Scheme = "Proportional")
# Combine the datasets
<- bind_rows(
combined_results %>% select(state, Scheme, Democrat_ECV, Republican_ECV),
wta_results %>% select(state, Scheme, Democrat_ECV = democrat_ecv, Republican_ECV = republican_ecv)
proportional_results
)
# Reshape data for ggplot
<- combined_results %>%
combined_long pivot_longer(cols = c(Democrat_ECV, Republican_ECV), names_to = "Party", values_to = "ECV")
# Plot for comparing WTA and Proportional allocation
ggplot(combined_long, aes(x = state, y = ECV, fill = Party)) +
geom_bar(stat = "identity", position = "stack") +
facet_wrap(~ Scheme, ncol=1, scales = "free_y") + # Facet by Scheme to compare WTA and Proportional
scale_fill_manual(values = c("Democrat_ECV" = "blue", "Republican_ECV" = "red")) +
theme_minimal() +
labs(title = "ECV Allocation Comparison by Scheme", x = "State", y = "ECVs") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
scale_fill_viridis(discrete = TRUE, option = "C")
As shown in these graphs, there is a noticeable difference between the two allocation methods. The Proportional approach provides a more balanced distribution of Electoral College Votes (ECVs) for each party, reflecting the actual proportion of votes received. In contrast, the Winner-Takes-All (WTA) method results in a more polarized outcome, with states like California and Texas allocating all of their ECVs to a single party, regardless of close vote margins.
Thank you.