In [None]:
!curl --location --remote-name --remote-header-name \
    http://ftp.eea.europa.eu/www/eprtr/v17/E-PRTR_database_v17_xls.zip

In [2]:
!unzip E-PRTR_database_v17_xls.zip 'Pollutant releases.xlsx'

Archive:  E-PRTR_database_v17_xls.zip
  inflating: Pollutant releases.xlsx  


In [3]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import pandas
import geopandas
import geoplot

In [4]:
%%time

pollutant_releases = pandas.read_excel('Pollutant releases.xlsx')[[
    'FacilityID', 'Lat', 'Long', 'ReportingYear',
    'PollutantName', 'PollutantGroupName', 'TotalQuantity',
]]
for column_name in pollutant_releases.select_dtypes('object'):
    pollutant_releases[column_name] = pollutant_releases[column_name].astype('category')
pollutant_releases.to_feather('pollutant_releases.feather')
print(pollutant_releases.info())
del pollutant_releases;

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 652351 entries, 0 to 652350
Data columns (total 7 columns):
FacilityID            652351 non-null int64
Lat                   652351 non-null float64
Long                  652351 non-null float64
ReportingYear         652351 non-null int64
PollutantName         652351 non-null category
PollutantGroupName    652351 non-null category
TotalQuantity         652351 non-null float64
dtypes: category(2), float64(3), int64(2)
memory usage: 26.1 MB
None
CPU times: user 3min, sys: 900 ms, total: 3min 1s
Wall time: 3min 1s


In [None]:
# http://geopandas.org/gallery/create_geopandas_from_pandas.html
pollutant_releases = pandas.read_feather('pollutant_releases.feather')
pollutant_releases_geo = geopandas.GeoDataFrame(
    pollutant_releases,
    geometry=geopandas.points_from_xy(
        pollutant_releases['Long'],
        pollutant_releases['Lat'],
    ),
)
del pollutant_releases
pollutant_releases_geo.head()

In [6]:
world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))

In [7]:
european_facilities_2011 = geopandas.GeoDataFrame(pollutant_releases_geo[
    (pollutant_releases_geo.Long > -28)
    & (pollutant_releases_geo.Long < 32)
    & (pollutant_releases_geo.Lat > 32)
    & (pollutant_releases_geo.Lat < 75)
    & (pollutant_releases_geo.ReportingYear == 2011)
].set_index('FacilityID').groupby('FacilityID').first())

In [None]:
ax = geoplot.kdeplot(
    european_facilities_2011,
    clip=world.geometry,
    shade=True,
    cmap='Reds',
    projection=geoplot.crs.AlbersEqualArea(),
    figsize=(20, 12),
    # extent=(-28, 32, 32, 75),
)
geoplot.polyplot(
    world.geometry,
    ax=ax,
    linewidth=1,
)
geoplot.pointplot(
    european_facilities_2011,
    color='orange',
    s=0.4,
    ax=ax,
);
ax.set_title('E-PRTR_database_v17_xls.zip / Pollutant releases.xlsx / 2011 / facilities');