%matplotlib inline
import geopandas
import pandas as pd
import xlwings
import plotnine
import statsmodels
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
dnr_counties = geopandas.read_file('/Users/alys/Downloads/County_Boundaries_24K.shp')
#For consistency
dnr_counties["County"] = dnr_counties["COUNTY_NAM"].str.upper()
dnr_counties["County"] = dnr_counties["County"].replace("SAINT CROIX", "ST. CROIX")
#dnr_counties.head()
Get and clean a number of files provided by the state.
registered_2018_wb = pd.read_excel("data/registeredvotersbycounty_11_01_2018_xlsx_77758.xlsx")
registered_2016_wb = pd.read_excel("data/registeredvotersbycounty_xlsx_45404.xlsx") #pretty sure this is 2016??
registered_2014_prior = pd.read_excel("data/voter_registration_summary_statistics_xlsx_11280.xlsx", sheet_name="Voters by County")
registered_2014_prior_long = registered_2014_prior.melt(id_vars="County", var_name="election", value_name="registered_voters")
registered_2014_prior_long.columns
cols = registered_2016_wb.iloc[5]
registered_2016_wb.columns = cols
registered_2016 = registered_2016_wb[["CountyCode", "County", "Registered Voters"]][8:80]
registered_2016.reset_index()
registered_2016 = registered_2016[["County", "Registered Voters"]]
registered_2016["election"] = pd.Series(["11/8/2016"] * 80)
registered_2016.columns = ['County', 'registered_voters', 'election']
registered_2016.head()
registered_2018 = registered_2018_wb[["County", "Registered Voters"]][0:72]
registered_2018["election"] = pd.Series(["11/8/2018"] * 79)
registered_2018.columns = ['County', 'registered_voters', 'election']
registered_2018.head()
registered_all = registered_2018.append(registered_2016) #.sort_values("County")
registered_all = registered_all.append(registered_2014_prior_long)
registered_all.tail()
#registered_all.reset_index()
registered_all[(registered_all['County'] == 'ADAMS COUNTY') & (registered_all['registered_voters'] > 11516)]
registered_midterm_d = registered_2018.merge(registered_all[registered_all['election'] =="11/01/2014"] ,left_on="County", right_on="County", suffixes=("_2018", "_2014"))
registered_midterm_d["midterm_registered_change"] =registered_midterm_d["registered_voters_2018"] - registered_midterm_d["registered_voters_2014"]
registered_midterm_d["County"] = registered_midterm_d["County"].str.replace(" COUNTY", "")
registered_midterm_d.head()
registered_presidential_d = registered_2016.merge(registered_all[registered_all['election'] =="11/05/2012"] ,left_on="County", right_on="County", suffixes=("_2016", "_2012"))
registered_presidential_d["presidential_registered_change"] =registered_presidential_d["registered_voters_2016"] - registered_presidential_d["registered_voters_2012"]
registered_presidential_d["County"] = registered_presidential_d["County"].str.replace(" COUNTY", "")
registered_presidential_d.head()
votes_2018_wb = pd.read_excel("data/County by County Report-2018 Gen Election-State Constitutional Offices.xlsx", sheet_name=1)
votes_2016_wb = pd.read_excel("data/County by County Report President of the United States Recount_2016.xlsx")
votes_2014_wb = pd.read_excel("data/County by County Report_2014.xlsx", sheet_name=1)
votes_2012_wb = pd.read_excel("data/County by County_11.6.12.xls", sheet_name=1)
def process_vote_df(vote_df):
top_label = vote_df.iloc[4]
bottom_label = vote_df.iloc[5]
cols = top_label + " (" + bottom_label + ")"
cols[0] = "County"
cols[2] = "total_votes"
vote_df_new = vote_df[6:]
vote_df_new.columns = cols
vote_df_new["total_votes"].astype('float64')
vote_df_new["County"] = vote_df_new["County"].str.strip()
return vote_df_new
votes_2018 = process_vote_df(votes_2018_wb)
votes_2016 = process_vote_df(votes_2016_wb)
votes_2014 = process_vote_df(votes_2014_wb)
votes_2012 = process_vote_df(votes_2012_wb)
historic_pop_estimates = pd.read_csv("data/PEP_2018_PEPANNRES_with_ann.csv")
#Replace IDs with descriptive ones
descriptive_names = list(historic_pop_estimates.iloc[0,:])
descriptive_names = [name.replace("Population Estimate (as of July 1) -", "") for name in descriptive_names]
historic_pop_estimates.columns = descriptive_names
# Match column names
historic_pop_estimates["County"] = historic_pop_estimates["Geography"].str.replace(" County, Wisconsin", "")
historic_pop_estimates_long = historic_pop_estimates[1:].melt(id_vars=["Id", "Id2", "Geography", "County", "April 1, 2010 - Census", "April 1, 2010 - Estimates Base"],
var_name="Year", value_name="Population")
historic_pop_estimates_long.head()
demo = pd.read_csv("data/ACS_17_5YR_DP05_with_ann.csv")
demo_col_labels = dict(demo.loc[0])
demo = demo[1:]
demo['County'] = demo['GEO.display-label'].str.split(pat="County", expand=True)[0]
demo['County'] = demo['County'].str.strip()
demo['County'] = demo['County'].str.upper()
demo.head()
demo['percent_poc'] = 100 - demo['HC03_VC99'].astype('float64')
demo['percent_eligible_vote'] = demo['HC01_VC113'].astype('int') / demo['HC01_VC03'].astype('int')
demo['median_age'] = demo['HC01_VC24'].astype('float64')
demo['voting_population'] = demo['HC01_VC113'].astype('int')
Now let's calculate the change over time.
def calculate_turnout(df, year):
return (df["total_votes_" + year] / df["registered_voters_" + year]).astype('float64')
def calculate_turnout2(df, year):
turnout = (df["total_votes_" + year] / df["registered_voters_" + year]).astype('float64')
scaled_turnout = turnout*100
return turnout, scaled_turnout
def calculate_dem_margin(df, nth):
dem_turnout_name = [col for col in df.columns if "DEM" in col.upper()][nth]
rep_turnout_name = [col for col in df.columns if "REP" in col.upper()][nth]
overall_total_name = [col for col in df.columns if "total_votes_" in col.lower()][nth]
new_series = (df[dem_turnout_name] - df[rep_turnout_name])/df[overall_total_name]
return new_series.astype('float64')
def calculate_ratio(df, year1, year2):
ratio = df["Turnout_"+year1] / df["Turnout_"+year2]
change = (ratio - 1).astype('float64')
return ratio, change
votes_midterm_d = votes_2018.iloc[:, 0:5].merge(registered_midterm_d, left_on="County", right_on="County")
votes_midterm_d = votes_midterm_d.merge(votes_2014.iloc[:, 0:5], left_on="County", right_on="County", suffixes=("_2018", "_2014"))
votes_midterm_d["Turnout_2018"], votes_midterm_d["Turnout_2018_scaled"] = calculate_turnout2(votes_midterm_d, "2018")
votes_midterm_d["Turnout_2014"], votes_midterm_d["Turnout_2014_scaled"] = calculate_turnout2(votes_midterm_d, "2014")
votes_midterm_d["Turnout_ratio"], votes_midterm_d["Turnout_change"] = calculate_ratio(votes_midterm_d, "2018", "2014")
votes_midterm_d["Democratic_margin_2018"] = calculate_dem_margin(votes_midterm_d, 0)
votes_midterm_d["Democratic_margin_2014"] = calculate_dem_margin(votes_midterm_d, 1)
#votes_midterm_d.head(15)
presidential_voters_d = votes_2016.iloc[:, 0:5].merge(registered_presidential_d, left_on="County", right_on="County")
presidential_voters_d = presidential_voters_d.merge(votes_2012.iloc[:, 0:5], left_on="County", right_on="County", suffixes=("_2016", "_2012"))
presidential_voters_d["Turnout_2016"], presidential_voters_d["Turnout_2016_scaled"] = calculate_turnout2(presidential_voters_d, "2016")
presidential_voters_d["Turnout_2012"], presidential_voters_d["Turnout_2012_scaled"] = calculate_turnout2(presidential_voters_d, "2012")
presidential_voters_d["Turnout_ratio"], presidential_voters_d["Turnout_change"] = calculate_ratio(presidential_voters_d, "2016", "2012")
presidential_voters_d["Democratic_margin_2016"] = calculate_dem_margin(presidential_voters_d, 0)
presidential_voters_d["Democratic_margin_2012"] = calculate_dem_margin(presidential_voters_d, 1)
#presidential_voters_d.head(15)
all_voters_d = presidential_voters_d.merge(votes_midterm_d, left_on="County", right_on="County", suffixes=("_presidential", "_midterm"))
all_voters_d_with_demo = all_voters_d.merge(demo, left_on="County", right_on="County")
def plot_var(df, col, cmap=None):
global count
ts = datetime.datetime.now().strftime("%y_%m_%d_%H_%M")
dnr_counties_midterm = dnr_counties.merge(df, left_on="County", right_on="County")
fig, ax = plt.subplots(figsize=(12,10), subplot_kw={'aspect':'equal'})
plot = dnr_counties_midterm.plot(column=col, legend=True, ax=ax, cmap=cmap, label="test")
#ax.legend(loc=2, prop={'size': 6})
ax.set_axis_off()
plt.savefig(f"turnout {ts} {count}.png")
plt.savefig(f"turnout {ts} {count}.svg")
count += 1
return plot
import datetime
count = 0
ts = datetime.datetime.now().strftime("%y_%m_%d_%H_%M")
def plot_scatter(df, xcol, ycol, title=None, x_title=None, y_title=None):
global count
x = df[xcol]
y = df[ycol]
x = statsmodels.api.add_constant(x)
model = statsmodels.api.OLS(y, x)
results = model.fit()
#s = results.summary()
intercept, slope = results.params
font = plotnine.themes.element_text(family="Marcellus", size=16)
font2 = plotnine.themes.element_text(family="DejaVu Sans", size=9)
plot = (plotnine.ggplot(df, plotnine.aes(x=xcol, y=ycol))
+ plotnine.geom_point() + plotnine.theme_light()
#+ plotnine.geom_label(plotnine.aes(label="County"))
+ plotnine.geom_abline(slope=slope, intercept=intercept)
+ plotnine.theme(text=font, axis_text=font2))
if title is not None:
plot += plotnine.labels.ggtitle(title=title)
if x_title is not None:
plot += plotnine.scale_x_continuous(name=x_title)
if y_title is not None:
plot += plotnine.scale_y_continuous(name=y_title)
rsq = ("{:.4f}" if abs(results.rsquared) > .001 else "{:.4e}").format(results.rsquared)
m = ("{:.4f}" if abs(slope) > .001 else "{:.4e}").format(slope)
b= ("{:.4f}" if abs(intercept) > .001 else "{:.4e}").format(intercept)
plot.save(f"turnout {ts} {count}", width=7.50, height=6, dpi=150)
plot.save(f"turnout {ts} {count}.svg")
count += 1
print(f"R^2 {rsq}; y={m}x + {b}")
return plot, results
plot_var(votes_midterm_d, "Turnout_change",cmap='Blues')
plt.title("2018 Turnout Change", fontsize=24, fontname="Marcellus")
count += 1
plt.savefig(f"turnout {ts} {count}.png")
plt.show()
plot_var(presidential_voters_d, "Turnout_change",cmap='RdYlBu')
dnr_counties_presidential = dnr_counties.merge(presidential_voters_d, how="outer", left_on="County", right_on="County")
#dnr_counties_presidential[]
#[(col, dnr_counties_presidential[col].hasnans) for col in dnr_counties_presidential.columns]
#import numpy as np
#dnr_counties_presidential[dnr_counties_presidential['Turnout_change'].isna()]
votes_midterm_d["County"]
historic_data = pd.read_excel("data/Voter Turnout Partisan-NonPartisan Through April 2018.xlsx")
def get_type(val):
return "Presidential" if (val % 4 == 0) else "Midterm"
historic_data["Type"] = historic_data["Year"].apply(get_type) # (historic_data["Year"] % 4 == 0)
historic_data
font = plotnine.themes.element_text(family="Marcellus", size=16)
font2 = plotnine.themes.element_text(family="DejaVu Sans", size=9)
plot = (plotnine.ggplot(historic_data[historic_data["Year"] > 1978], plotnine.aes(x="Year", y="General Election Turnout", color="Type"))
+ plotnine.geom_line() + plotnine.theme_light()
# + plotnine.facet_wrap(facets="Type")
+ plotnine.theme(text=font, axis_text=font2))
#2018 turnout using nonpartisan voting age population figure
turnout_2018 = all_voters_d["total_votes_2018"].sum() / 4469475
new_row = pd.DataFrame([["2018", 4469475, None, turnout_2018, None, None, None, None, "Midterm"]], columns=historic_data.columns)
historic_data_with2018 = historic_data.append(new_row)
historic_data_with2018
historic_data_with2018["Year"] = historic_data_with2018["Year"].astype('int32')
historic_data_with2018["General Election Turnout Scaled"] = historic_data_with2018["General Election Turnout"] * 100
plot = (plotnine.ggplot(historic_data_with2018, #[historic_data_with2018["Year"] > 1978],
plotnine.aes(x="Year", y="General Election Turnout Scaled", color="Type"))
+ plotnine.geom_line() + plotnine.theme_light()
# + plotnine.facet_wrap(facets="Type")
+ plotnine.labels.ggtitle(title="Turnout over time")
+ plotnine.scale_y_continuous(name="General Election Turnout")
+ plotnine.theme(text=font, axis_text=font2, legend_text=font2))
plot.save(f"turnout {ts} {count}", width=7.50, height=6, dpi=150)
plot
pd.Series.astype?
font = plotnine.themes.element_text(family="Marcellus", size=16)
font2 = plotnine.themes.element_text(family="DejaVu Sans", size=9)
(plotnine.ggplot(all_voters_d, plotnine.aes(x="Turnout_2018"))
+ plotnine.geom_histogram(color="grey",fill="none") + plotnine.theme_light()
+ plotnine.theme(text=font, axis_text=font2))
#all_voters_d_with2018
#from matplotlib import pyplot
plot = (plotnine.ggplot(all_voters_d, plotnine.aes(x="Turnout_2018"))
+ plotnine.geom_histogram(color="grey",fill="none") + plotnine.theme_light()
+ plotnine.labels.ggtitle(title="Turnout by Number of Counties")
+ plotnine.scale_y_continuous(name="Number of Counties")
+ plotnine.scale_x_continuous(name="2018 Turnout")
+ plotnine.theme(text=font, axis_text=font2, panel_grid_major=plotnine.element_blank(),
panel_grid_minor=plotnine.element_blank()))
count += 1
plot.save(f"turnout {ts} {count}.png")
plot
plot_var(all_voters_d, "Turnout_2018")
plt.title("2018 Turnout", fontsize=24, fontname="Marcellus")
count += 1
plt.savefig(f"turnout {ts} {count}.png")
plt.show()
all_voters_d_with_demo["Turnout_2018_bin"] =pd.cut(all_voters_d_with_demo["Turnout_2018"], 10)
all_voters_d_with_demo.groupby("Turnout_2018_bin").agg({'County': pd.Series.count, 'voting_population': sum})
all_voters_d["total_votes_2014"].sum() /all_voters_d["registered_voters_2014"].sum()
all_voters_d["total_votes_2014"].sum()
all_voters_d["total_votes_2018"].sum() / 4469475 #/ all_voters_d_with_demo["HC01_VC113"].sum()
(plotnine.ggplot(votes_midterm_d, plotnine.aes(x="Turnout_change", y="Democratic_margin_2018"))
+ plotnine.geom_point() + plotnine.theme_light()
#+ plotnine.geom_label(plotnine.aes(label="County"))
)
plot, summary = plot_scatter(all_voters_d_with_demo,
'Democratic_margin_2018', "Turnout_change_midterm",
title="Turnout change compared to Democratic margin",
x_title="Democratic margin in 2018",
y_title="Turnout change between 2014 and 2018"
)
plot
(plotnine.ggplot(presidential_voters_d, plotnine.aes(x="Turnout_change", y="Democratic_margin_2016"))
+ plotnine.geom_point() + plotnine.theme_light()
#+ plotnine.geom_label(plotnine.aes(label="County"))
)
plot, summary = plot_scatter(all_voters_d_with_demo,
'Democratic_margin_2016', "Turnout_change_presidential",
title="Turnout change in 2016 compared to Democratic margin",
x_title='Democratic margin 2016', y_title="Turnout change")
plot
(plotnine.ggplot(all_voters_d, plotnine.aes(x="Turnout_change_presidential", y="Turnout_change_midterm"))
+ plotnine.geom_point() + plotnine.theme_light()
#+ plotnine.geom_label(plotnine.aes(label="County"))
)
It looks like there's not really a relationship between turnout before and after — in other words, the counties with particularly low turnout in 2016 didn't necessarily have less turnout in 2018 (or really, a smaller increase). E.g., turnout dropped significantly in Milwaukee in 2016 (compared to 2012), but it increased by a typical amount in 2018.
How closely do eligible voters track registered voters?
all_voters_d_with_demo["registered_voters_2016"] = all_voters_d_with_demo["registered_voters_2016"].astype('int')
all_voters_d_with_demo['HC01_VC113'] = all_voters_d_with_demo['HC01_VC113'].astype('int')
plot, summary = plot_scatter(all_voters_d_with_demo, "registered_voters_2016", 'HC01_VC113' )
plot
plot, summary = plot_scatter(all_voters_d_with_demo, "registered_voters_2018", 'HC01_VC113' )
plot
all_voters_d_with_demo.head()
plot, summary = plot_scatter(all_voters_d_with_demo, "percent_poc",
"Turnout_change_midterm",
title="Turnout compared to percent people of color by county",
x_title="Percent people of color",
y_title="Turnout change between 2014 and 2018")
plot
all_voters_d_with_demo["Turnout_change_presidential_scaled"] = all_voters_d_with_demo["Turnout_change_presidential"]*100
all_voters_d_with_demo["Turnout_change_midterm_scaled"] = all_voters_d_with_demo["Turnout_change_midterm"]*100
plot, summary = plot_scatter(all_voters_d_with_demo,"Turnout_change_presidential_scaled", "Turnout_change_midterm_scaled",
title="Turnout change since last presidential versus last midterm")
plot
all_voters_d_demo_no_outliers = all_voters_d_with_demo[all_voters_d_with_demo["percent_poc"] < 30]
plot, summary = plot_scatter(all_voters_d_demo_no_outliers, "percent_poc",
"Turnout_change_midterm_scaled",
title="Turnout change compared to percent people of color by county",
x_title="Percent people of color",
y_title="Turnout change, 2014 to 2018\n(percentage points)")
plot
#all_voters_d_demo_no_outliers = all_voters_d_with_demo[all_voters_d_with_demo["HC03_VC55"] < 30]
plot, summary = plot_scatter(all_voters_d_with_demo, "HC03_VC55",
"Turnout_change_midterm_scaled",
title="Turnout change compared to Black resident proportion",
x_title="Percent of residents who are Black",
y_title="Turnout change, 2014 to 2018\n(percentage points)")
plot
all_voters_d_demo_no_outliers = all_voters_d_with_demo[all_voters_d_with_demo["HC03_VC55"] < 10]
plot, summary = plot_scatter(all_voters_d_demo_no_outliers, "HC03_VC55",
"Turnout_change_midterm_scaled",
title="Turnout change compared to Black resident proportion",
x_title="Percent of residents who are Black",
y_title="Turnout change, 2014 to 2018\n(percentage points)")
plot
plot, summary = plot_scatter(all_voters_d_with_demo, "percent_poc",
"Turnout_2018_scaled",
title="Turnout compared to percent people of color by county",
x_title="Percent people of color",
y_title="Turnout 2018")
plot
[col for col in all_voters_d_with_demo.columns if "turnout" in col.lower()]
plot, summary = plot_scatter(all_voters_d_with_demo, "percent_poc",
"Turnout_2016_scaled",
title="Turnout compared to percent people of color by county",
x_title="Percent people of color",
y_title="Turnout 2016")
plot
plot, summary = plot_scatter(all_voters_d_with_demo, "Democratic_margin_2016",
"Turnout_2016",
title="Turnout compared to Democratic margin by county",
x_title="Democratic margin",
y_title="Turnout 2016")
plot
plot, summary = plot_scatter(all_voters_d_with_demo, "percent_poc",
"Turnout_2012_scaled",
title="Turnout compared to percent people of color by county",
x_title="Percent people of color",
y_title="Turnout 2012")
plot
turnout_long = all_voters_d_with_demo.melt(id_vars=["County", "percent_poc"], value_vars=["Turnout_2018", "Turnout_2016", "Turnout_2014", "Turnout_2012"], var_name="Year", value_name="Turnout")
turnout_long["Turnout"] = turnout_long["Turnout"].astype("float64") * 100
turnout_long['Year'] = turnout_long["Year"].str.replace("Turnout_","")
font_main = plotnine.themes.element_text(family="Marcellus", size=16)
font_sans = plotnine.themes.element_text(family="DejaVu Sans", size=9)
font_sans_small = plotnine.themes.element_text(family="DejaVu Sans", size=7)
plot = (plotnine.ggplot(turnout_long, plotnine.aes(x="percent_poc", y="Turnout"))
+ plotnine.geom_point(shape=".") + plotnine.facet_wrap(facets="Year")
+ plotnine.geom_smooth(method="lm") + plotnine.theme_light()
+ plotnine.theme(text=font_main, axis_text=font_sans, strip_text=font_sans))
plot += plotnine.labels.ggtitle(title="Turnout by percentage people of color")
plot += plotnine.scale_x_continuous(name="Percent people of color")
plot += plotnine.scale_y_continuous(name="Turnout")
plot.save(f"turnout {ts} {count}", width=7.50, height=6, dpi=150)
plot.save(f"turnout {ts} {count}.svg")
count += 1
plot
#HC03_VC54 White ; HC03_VC55 Black; HC03_VC56 Native American; HC03_VC61 Asian American; HC03_VC75 Multiracial; HC03_VC93 Hispanic/Latinx
turnout_long = all_voters_d_with_demo.melt(id_vars=["County", "percent_poc", "HC03_VC54", "HC03_VC55", "HC03_VC56", "HC03_VC61", "HC03_VC75", "HC03_VC93"],
value_vars=["Turnout_2018", "Turnout_2016", "Turnout_2014", "Turnout_2012"],
var_name="Year", value_name="Turnout")
turnout_long["White"] = turnout_long["HC03_VC54"]
del turnout_long["HC03_VC54"]
turnout_long["Black"] = turnout_long["HC03_VC55"]
del turnout_long["HC03_VC55"]
turnout_long["Native American"] = turnout_long["HC03_VC56"]
del turnout_long["HC03_VC56"]
turnout_long["Asian American"] = turnout_long["HC03_VC61"]
del turnout_long["HC03_VC61"]
turnout_long["Multiracial"] = turnout_long["HC03_VC75"]
del turnout_long["HC03_VC75"]
turnout_long["Hispanic/Latinx"] = turnout_long["HC03_VC93"]
del turnout_long["HC03_VC93"]
turnout_long = turnout_long.melt(id_vars=["County", "Year", "percent_poc", "Turnout"], var_name="Race", value_name="Percent")
turnout_long.head()
turnout_long["Turnout"] = turnout_long["Turnout"].astype("float64") * 100
turnout_long["Percent"] = turnout_long["Percent"].astype("float64")
turnout_long['Year'] = turnout_long["Year"].str.replace("Turnout_","")
plot = (plotnine.ggplot(turnout_long, plotnine.aes(x="Percent", y="Turnout"))
+ plotnine.geom_bin2d() + plotnine.facet_grid(facets=["Year","Race"])
+ plotnine.geom_smooth(method="lm") + plotnine.theme_light()
+ plotnine.theme(text=font_main, axis_text=font_sans, strip_text=font_sans_small, strip_text_y=font_sans))
plot += plotnine.labels.ggtitle(title="Turnout by race and year")
plot += plotnine.scale_x_continuous(name="Race's proportion of residents")
plot += plotnine.scale_y_continuous(name="Turnout rate")
#plot.save(f"turnout {ts} {count}", width=7.50, height=6, dpi=150)
#plot.save(f"turnout {ts} {count}.svg")
plot
This is a bit hard to read because for several racial groups, the counties all fall with a ten-percent range. Let's allow the x axes to vary by race.
plot = plot + plotnine.geom_point(shape=".") + plotnine.facet_grid(facets=["Year","Race"], scales="free_x") + plotnine.theme(legend_position = "none")
plot.save(f"turnout {ts} {count}", width=7.50, height=6, dpi=150)
plot.save(f"turnout {ts} {count}.svg")
count +=1
plot
all_voters_d_with_demo["HC03_VC55"] = all_voters_d_with_demo["HC03_VC55"].astype('float64')
plot, summary = plot_scatter(all_voters_d_with_demo, "HC03_VC55",
"Turnout_2018_scaled",
title="Turnout compared to county's proportion of residents who are Black",
x_title="Proportion of Black residents",
y_title="2018 Turnout")
plot
x = all_voters_d_with_demo[["percent_poc", "Democratic_margin_2018"]]
y = all_voters_d_with_demo["Turnout_change_midterm"]
x = statsmodels.api.add_constant(x)
model = statsmodels.api.OLS(y, x)
results = model.fit()
results.summary()
x = all_voters_d_with_demo[["percent_poc", "Democratic_margin_2018", "Turnout_change_presidential"]]
y = all_voters_d_with_demo["Turnout_change_midterm"]
x = statsmodels.api.add_constant(x)
model = statsmodels.api.OLS(y, x)
results = model.fit()
results.summary()
x = all_voters_d_with_demo[["percent_poc","Democratic_margin_2016" ]]
y = all_voters_d_with_demo["Democratic_margin_2018"]
x = statsmodels.api.add_constant(x)
model = statsmodels.api.OLS(y, x)
results = model.fit()
results.summary()
model = smf.ols(formula="Turnout_change_presidential ~ Democratic_margin_2016 + percent_poc", data=all_voters_d_with_demo)
results = model.fit()
results.summary()
plot, summary = plot_scatter(all_voters_d_with_demo,
'Democratic_margin_2016', "Democratic_margin_2018",
title="Democratic margin in 2018 vs. 2016"
)
plot
plot, summary = plot_scatter(all_voters_d_with_demo,
'Democratic_margin_2014', "Democratic_margin_2018",
title="Democratic margin in 2018 vs. 2014"
)
plot
plot, summary = plot_scatter(all_voters_d_with_demo,
'Turnout_2018', "Turnout_2014",
title="Turnout in 2018 vs. 2016"
)
plot
plot, summary = plot_scatter(all_voters_d_with_demo,
'Turnout_2012', "Turnout_2016",
title="Turnout in 2016 vs. 2012"
)
plot
model = smf.ols(formula="Turnout_change_presidential ~ Democratic_margin_2016 + median_age + percent_poc ", data=all_voters_d_with_demo)
results = model.fit()
results.summary()
all_voters_d_with_demo["proportion_poc"] = all_voters_d_with_demo["percent_poc"]/100
model = smf.ols(formula="Turnout_change_presidential ~ (Democratic_margin_2016) * median_age * proportion_poc ", data=all_voters_d_with_demo)
results = model.fit()
results.summary()