|
@@ -12,61 +12,48 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
+ "execution_count": 2,
|
|
|
"metadata": {},
|
|
|
- "outputs": [],
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "Archive: E-PRTR_database_v17_xls.zip\n",
|
|
|
+ " inflating: Pollutant releases.xlsx \n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
"source": [
|
|
|
"!unzip E-PRTR_database_v17_xls.zip 'Pollutant releases.xlsx'"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
+ "execution_count": 3,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
- "import pandas\n",
|
|
|
+ "%matplotlib inline\n",
|
|
|
+ "%config InlineBackend.figure_format = 'retina'\n",
|
|
|
"\n",
|
|
|
- "pollutant_releases = pandas.read_excel('Pollutant releases.xlsx')\n",
|
|
|
- "pollutant_releases.head()"
|
|
|
- ]
|
|
|
- },
|
|
|
- {
|
|
|
- "cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
- "metadata": {},
|
|
|
- "outputs": [],
|
|
|
- "source": [
|
|
|
+ "import pandas\n",
|
|
|
"import geopandas\n",
|
|
|
- "\n",
|
|
|
- "# http://geopandas.org/gallery/create_geopandas_from_pandas.html\n",
|
|
|
- "pollutant_releases_geo = geopandas.GeoDataFrame(\n",
|
|
|
- " pollutant_releases[[\n",
|
|
|
- " 'FacilityID', 'Lat', 'Long', 'ReportingYear', 'PollutantName', \n",
|
|
|
- " 'PollutantGroupName', 'TotalQuantity', 'TotalQuantity',\n",
|
|
|
- " ]],\n",
|
|
|
- " geometry=geopandas.points_from_xy(\n",
|
|
|
- " pollutant_releases['Long'],\n",
|
|
|
- " pollutant_releases['Lat'],\n",
|
|
|
- " ),\n",
|
|
|
- ")\n",
|
|
|
- "for column_name in pollutant_releases_geo.select_dtypes('object'):\n",
|
|
|
- " pollutant_releases_geo[column_name] = pollutant_releases_geo[column_name].astype('category')\n",
|
|
|
- "pollutant_releases_geo.head()"
|
|
|
+ "import geoplot"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 5,
|
|
|
+ "execution_count": 4,
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
{
|
|
|
"name": "stdout",
|
|
|
"output_type": "stream",
|
|
|
"text": [
|
|
|
- "<class 'geopandas.geodataframe.GeoDataFrame'>\n",
|
|
|
+ "<class 'pandas.core.frame.DataFrame'>\n",
|
|
|
"RangeIndex: 652351 entries, 0 to 652350\n",
|
|
|
- "Data columns (total 9 columns):\n",
|
|
|
+ "Data columns (total 7 columns):\n",
|
|
|
"FacilityID 652351 non-null int64\n",
|
|
|
"Lat 652351 non-null float64\n",
|
|
|
"Long 652351 non-null float64\n",
|
|
@@ -74,15 +61,26 @@
|
|
|
"PollutantName 652351 non-null category\n",
|
|
|
"PollutantGroupName 652351 non-null category\n",
|
|
|
"TotalQuantity 652351 non-null float64\n",
|
|
|
- "TotalQuantity 652351 non-null float64\n",
|
|
|
- "geometry 652351 non-null geometry\n",
|
|
|
- "dtypes: category(2), float64(4), geometry(1), int64(2)\n",
|
|
|
- "memory usage: 36.1 MB\n"
|
|
|
+ "dtypes: category(2), float64(3), int64(2)\n",
|
|
|
+ "memory usage: 26.1 MB\n",
|
|
|
+ "None\n",
|
|
|
+ "CPU times: user 3min, sys: 900 ms, total: 3min 1s\n",
|
|
|
+ "Wall time: 3min 1s\n"
|
|
|
]
|
|
|
}
|
|
|
],
|
|
|
"source": [
|
|
|
- "pollutant_releases_geo.info()"
|
|
|
+ "%%time\n",
|
|
|
+ "\n",
|
|
|
+ "pollutant_releases = pandas.read_excel('Pollutant releases.xlsx')[[\n",
|
|
|
+ " 'FacilityID', 'Lat', 'Long', 'ReportingYear',\n",
|
|
|
+ " 'PollutantName', 'PollutantGroupName', 'TotalQuantity',\n",
|
|
|
+ "]]\n",
|
|
|
+ "for column_name in pollutant_releases.select_dtypes('object'):\n",
|
|
|
+ " pollutant_releases[column_name] = pollutant_releases[column_name].astype('category')\n",
|
|
|
+ "pollutant_releases.to_feather('pollutant_releases.feather')\n",
|
|
|
+ "print(pollutant_releases.info())\n",
|
|
|
+ "del pollutant_releases;"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
@@ -91,21 +89,31 @@
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
- "world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))"
|
|
|
+ "# http://geopandas.org/gallery/create_geopandas_from_pandas.html\n",
|
|
|
+ "pollutant_releases = pandas.read_feather('pollutant_releases.feather')\n",
|
|
|
+ "pollutant_releases_geo = geopandas.GeoDataFrame(\n",
|
|
|
+ " pollutant_releases,\n",
|
|
|
+ " geometry=geopandas.points_from_xy(\n",
|
|
|
+ " pollutant_releases['Long'],\n",
|
|
|
+ " pollutant_releases['Lat'],\n",
|
|
|
+ " ),\n",
|
|
|
+ ")\n",
|
|
|
+ "del pollutant_releases\n",
|
|
|
+ "pollutant_releases_geo.head()"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
+ "execution_count": 6,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
- "%config InlineBackend.figure_format = 'retina'"
|
|
|
+ "world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
+ "execution_count": 7,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -124,8 +132,6 @@
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
- "import geoplot\n",
|
|
|
- "\n",
|
|
|
"ax = geoplot.kdeplot(\n",
|
|
|
" european_facilities_2011,\n",
|
|
|
" clip=world.geometry,\n",
|