使用Datashader可视化地理信息#

Datashader is part of the PyViz initiative for making Python-based visualization tools work well together.

!conda install datashader

PyViz

http://pyviz.org/

US Census#

import datashader as ds
import datashader.transfer_functions as tf
import dask.dataframe as dd
import numpy as np
df = dd.io.parquet.read_parquet('/Users/datalab/bigdata/census.snappy.parq')
df = df.persist()
df.head()
easting northing race
0 -13700737.0 6275190.0 w
1 -13700711.0 6275195.0 w
2 -13702081.0 6274898.5 w
3 -13701948.0 6274931.0 w
4 -13701793.0 6275088.5 w
USA           = ((-124.72,  -66.95), (23.55, 50.06))
LakeMichigan  = (( -91.68,  -83.97), (40.75, 44.08))
Chicago       = (( -88.29,  -87.30), (41.57, 42.00))
Chinatown     = (( -87.67,  -87.63), (41.84, 41.86))
NewYorkCity   = (( -74.39,  -73.44), (40.51, 40.91))
LosAngeles    = ((-118.53, -117.81), (33.63, 33.96))
Houston       = (( -96.05,  -94.68), (29.45, 30.11))
Austin        = (( -97.91,  -97.52), (30.17, 30.37))
NewOrleans    = (( -90.37,  -89.89), (29.82, 30.05))
Atlanta       = (( -84.88,  -84.04), (33.45, 33.84))

from datashader.utils import lnglat_to_meters as webm
x_range,y_range = [list(r) for r in webm(*USA)]

plot_width  = int(900)
plot_height = int(plot_width*7.0/12)

background = "black"
from functools import partial
from datashader.utils import export_image
from datashader.colors import colormap_select, Greys9
from IPython.core.display import HTML, display

export = partial(export_image, background = background, export_path="export")
cm = partial(colormap_select, reverse=(background!="black"))

display(HTML("<style>.container { width:100% !important; }</style>"))
cvs = ds.Canvas(plot_width, plot_height, *webm(*USA))
agg = cvs.points(df, 'easting', 'northing')
export(tf.shade(agg, cmap = cm(Greys9, 0.2), how='log'),"census_gray_linear")
from colorcet import fire
export(tf.shade(agg, cmap = cm(fire,0.2), how='eq_hist'),"census_ds_fire_eq_hist")
from datashader.colors import viridis
export(tf.shade(agg, cmap=cm(viridis), how='eq_hist'),"census_viridis_eq_hist")
if background == "black":
      color_key = {'w':'aqua', 'b':'lime',  'a':'red', 'h':'fuchsia', 'o':'yellow' }
else: color_key = {'w':'blue', 'b':'green', 'a':'red', 'h':'orange',  'o':'saddlebrown'}
def create_image(longitude_range, latitude_range, w=plot_width, h=plot_height):
    x_range,y_range=webm(longitude_range,latitude_range)
    cvs = ds.Canvas(plot_width=w, plot_height=h, x_range=x_range, y_range=y_range)
    agg = cvs.points(df, 'easting', 'northing', ds.count_cat('race'))
    img = tf.shade(agg, color_key=color_key, how='eq_hist')
    return img

export(create_image(*USA),"Zoom 0 - USA")
export(create_image(*NewYorkCity),"NYC")
cvs = ds.Canvas(plot_width=plot_width, plot_height=plot_height)
aggc = cvs.points(df, 'easting', 'northing', ds.count_cat('race'))

export(tf.shade(aggc.sel(race='b'), cmap=cm(Greys9,0.25), how='eq_hist'),"USA blacks")
agg2 = aggc.where((aggc.sel(race=['w', 'b', 'a', 'h']) > 0).all(dim='race')).fillna(0)
export(tf.shade(agg2, color_key=color_key, how='eq_hist'),"USA all")

NYC Crime#

# https://data.cityofnewyork.us/Public-Safety/NYPD-Complaint-Data-Historic/qgea-i56i
import pandas as pd
df = pd.read_csv('/Users/datalab/bigdata/NYPD_Complaint_Data_Historic.csv', #nrows = 1000, 
                usecols= ['Latitude', 'Longitude', 'SUSP_SEX', 'SUSP_RACE', 'OFNS_DESC'])
df.head()
OFNS_DESC SUSP_RACE SUSP_SEX Latitude Longitude
0 PETIT LARCENY NaN NaN 40.616758 -73.963143
1 ROBBERY WHITE HISPANIC M 40.747944 -73.854781
2 HARRASSMENT 2 BLACK U 40.576995 -73.981524
3 DANGEROUS DRUGS WHITE M 40.607195 -74.148564
4 GRAND LARCENY UNKNOWN M 40.802294 -73.945280
from datashader.utils import lnglat_to_meters as webm

df['Lon'], df['Lat'] = webm(df['Longitude'].tolist(), df['Latitude'].tolist())
df.head()
OFNS_DESC SUSP_RACE SUSP_SEX Latitude Longitude Lon Lat
0 PETIT LARCENY NaN NaN 40.616758 -73.963143 -8.233539e+06 4.955977e+06
1 ROBBERY WHITE HISPANIC M 40.747944 -73.854781 -8.221477e+06 4.975234e+06
2 HARRASSMENT 2 BLACK U 40.576995 -73.981524 -8.235586e+06 4.950147e+06
3 DANGEROUS DRUGS WHITE M 40.607195 -74.148564 -8.254180e+06 4.954575e+06
4 GRAND LARCENY UNKNOWN M 40.802294 -73.945280 -8.231551e+06 4.983224e+06
df.columns
Index(['CMPLNT_NUM', 'CMPLNT_FR_DT', 'CMPLNT_FR_TM', 'CMPLNT_TO_DT',
       'CMPLNT_TO_TM', 'ADDR_PCT_CD', 'RPT_DT', 'KY_CD', 'OFNS_DESC', 'PD_CD',
       'PD_DESC', 'CRM_ATPT_CPTD_CD', 'LAW_CAT_CD', 'BORO_NM',
       'LOC_OF_OCCUR_DESC', 'PREM_TYP_DESC', 'JURIS_DESC', 'JURISDICTION_CODE',
       'PARKS_NM', 'HADEVELOPT', 'HOUSING_PSA', 'X_COORD_CD', 'Y_COORD_CD',
       'SUSP_AGE_GROUP', 'SUSP_RACE', 'SUSP_SEX', 'TRANSIT_DISTRICT',
       'Latitude', 'Longitude', 'Lat_Lon', 'PATROL_BORO', 'STATION_NAME',
       'VIC_AGE_GROUP', 'VIC_RACE', 'VIC_SEX'],
      dtype='object')
df.head()
OFNS_DESC SUSP_RACE SUSP_SEX Latitude Longitude Lon Lat
0 PETIT LARCENY NaN NaN 40.616758 -73.963143 -8.233539e+06 4.955977e+06
1 ROBBERY WHITE HISPANIC M 40.747944 -73.854781 -8.221477e+06 4.975234e+06
2 HARRASSMENT 2 BLACK U 40.576995 -73.981524 -8.235586e+06 4.950147e+06
3 DANGEROUS DRUGS WHITE M 40.607195 -74.148564 -8.254180e+06 4.954575e+06
4 GRAND LARCENY UNKNOWN M 40.802294 -73.945280 -8.231551e+06 4.983224e+06
df.groupby('SUSP_SEX').size()
SUSP_SEX
F     576490
M    1784627
U     438633
dtype: int64
df.groupby('SUSP_RACE').size()
SUSP_RACE
AMERICAN INDIAN/ALASKAN NATIVE       9036
ASIAN / PACIFIC ISLANDER            89136
BLACK                             1093935
BLACK HISPANIC                     149002
OTHER                                  11
UNKNOWN                            764617
WHITE                              330730
WHITE HISPANIC                     496597
dtype: int64
import datashader as ds
from datashader.utils import export_image
from datashader.colors import colormap_select, Greys9, Hot, inferno
import datashader.transfer_functions as tf
# http://datashader.org/topics/census.html
# Initial datashader / visualization configuration
background = 'black'
export = partial(export_image, background = background, export_path="export")
cm = partial(colormap_select, reverse=(background!="black"))
# Create a color key for VIOLATION, MISDEMEANOR, and FELONY
# color_key = {'F':'white', 'M':'yellow',  'U':'red'}
# Convert OFFENSE_LEVEL column to type 'category'
# df['SUSP_SEX'] = df['SUSP_SEX'].astype('category')
# Create function to re-generate canvas, grid, and map based on data category provided
from datashader.utils import lnglat_to_meters as webm

NewYorkCity   = (( -74.39,  -73.44), (40.51, 40.91))
x_range,y_range = [list(r) for r in webm(*NewYorkCity)]

plot_width  = int(900)
plot_height = int(plot_width*7.0/12)

cvs = ds.Canvas(plot_width, plot_height, *webm(*NewYorkCity))
agg = cvs.points(df, 'Lon', 'Lat')#, ds.count_cat('SUSP_SEX'))
export(tf.shade(agg, cmap = cm(Greys9,0.25), how='log'),"census_gray_linear")*2
from datashader.colors import viridis

# Show map with 'viridis' color map
export(tf.shade(agg, cmap = cm(viridis, 0.1), how = 'eq_hist'), "export")*2
from colorcet import fire
export(tf.shade(agg, cmap = cm(fire,0.2), how='eq_hist'),"census_ds_fire_eq_hist")*3