Problem with missing values in Altair or Plotly choropleth map

907 views Asked by At

I have data for a few states in the US and others are Null. While creating the maps, I would like to shade in the states with the missing value in the text but I am struggling to find the correct method. With my current code, I am unable to get the entire US map including the states with the Null value, and only the states with a certain assigned value pop up. I have also looked at previous questions posted and tried layering the maps but that gives me an error. here's how cc_df looks like

Here's my code:

# import the required library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt# import seaborn library
%matplotlib inline
import altair as alt
from vega_datasets import data

# State database
states_df = pd.read_csv(
    'https://www2.census.gov/geo/docs/reference/state.txt',
# pipe seperated file
sep="|",
# FIPS are best as strings
dtype='str',
# rename columns
header=0, names=['state_fips', 'state', 'state_name', 'StateENS'],
# drop last column
usecols=['state_fips', 'state_name', 'state']
).set_index('state')
states_df['id'] = states_df['state_fips'].astype(int)

# The data to map
cc_df = pd.read_csv('hv_cwad.csv',
                    usecols=['state', 'CWAD'])
cc_df = cc_df.groupby('state').mean()

# Combine state database and our own data
#cc_state_df = pd.merge(cc_df, states_df)
cc_state_df = states_df.join(cc_df).reset_index()

# %%
# Create the map
states = alt.topo_feature(data.us_10m.url, 'states')
variable_list = ['CWAD']

alt.Chart(states).mark_geoshape(stroke='lightgrey',
                                strokeWidth=.5).encode(
    alt.Color(alt.repeat('row'), type='quantitative')
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(cc_state_df, 'id', variable_list)
).properties(
    width=300,
    height=300
).project(
    type='albersUsa'
).repeat(
    row=variable_list
).resolve_scale(
    color='independent'
)

The output looks like this:

output with the current code

2

There are 2 answers

3
r-beginners On BEST ANSWER

I was aware that the points you pointed out were issues, so I did some research against NaN values and found the following answers. However, the conditional judgment of the null value did not work, so I replaced the missing value with -1 to get the desired output.

import pandas as pd
import matplotlib.pyplot as plt
import altair as alt
from vega_datasets import data

# State database
states_df = pd.read_csv('https://www2.census.gov/geo/docs/reference/state.txt', 
                        sep="|", 
                        dtype='str', 
                        header=0, 
                        names=['state_fips', 'state', 'state_name', 'StateENS'],
                        usecols=['state_fips', 'state_name', 'state']).set_index('state')

states_df['id'] = states_df['state_fips'].astype(int)

import io

data = '''
state CWAD
AR 377.715148
FL 6560.929494
GA 1958.122132
IA 0.409179
KS 63.706671 
'''

cc_df = pd.read_csv(io.StringIO(data), delim_whitespace=True)
# The data to map
#cc_df = pd.read_csv('hv_cwad.csv', usecols=['state', 'CWAD'])
cc_df = cc_df.groupby('state').mean()

# Combine state database and our own data
#cc_state_df = pd.merge(cc_df, states_df)
cc_state_df = states_df.join(cc_df).reset_index()
cc_state_df.fillna(-1, inplace=True)

# Create the map
states = alt.topo_feature(data.us_10m.url, 'states')

variable_list = ['CWAD']

alt.Chart(states).mark_geoshape(
    stroke='lightgrey',
    strokeWidth=.5
).encode(
        color=alt.condition('datum.CWAD !== -1', 'CWAD:Q', alt.value('lightgray'))
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(
        cc_state_df,
        'id',
        variable_list)
).properties(
    width=300,
    height=300
).project(
    type='albersUsa'
).repeat(
    row=variable_list
).resolve_scale(
    color='independent'
)

enter image description here

0
nchatt On

I was able to plot the missing data using Plotly instead of Altair. In case, you are not particular about using Altair, this may be helpful.

import the required library

import plotly.graph_objects as go import numpy as np import pandas as pd import matplotlib.pyplot as plt %matplotlib inline import altair as alt from vega_datasets import data

# State database
states_df = pd.read_csv(
    'https://www2.census.gov/geo/docs/reference/state.txt',
# pipe seperated file
sep="|",
# FIPS are best as strings
dtype='str',
# rename columns
header=0, names=['state_fips', 'state', 'state_name', 'StateENS'],
# drop last column
usecols=['state_fips', 'state_name', 'state']
).set_index('state')
states_df['id'] = states_df['state_fips'].astype(int)

# The data to map
cc_df = pd.read_csv('cwad_hv.csv',
                    usecols=['state', 'GWAD'])
cc_df = cc_df.groupby('state').mean()

# Combine state database and our own data
#cc_state_df = pd.merge(cc_df, states_df) #don't use this. 
cc_state_df = states_df.join(cc_df).reset_index()
#cc_state_df.fillna(0, inplace=True)#This changes the states with no data from NA to zero. If your data has a range -ve to +ve, skip this.

fig = go.Figure(data=go.Choropleth(
    locations=cc_state_df['state'],
    z=cc_state_df['CWAD'].astype(float),
    locationmode='USA-states',
    #color='Greens',
    autocolorscale=True,
    #range_color=[0, 6500],
    #text=df['text'], # hover text
    marker_line_color='black', # line markers between states
    colorbar_title="CWAD kg/ha"
))

fig.update_layout(
    title_text='CWAD',
    geo = dict(
        scope='usa',
        projection=go.layout.geo.Projection(type = 'albers usa'),
        showlakes=False, # lakes
        lakecolor='rgb(255, 255, 255)'),
)

fig.show()

Here is the output