2 Commits d1ca680da5 ... 096360a2b4

Author SHA1 Message Date
  Fabian Peter Hammerle 096360a2b4 object -> category 4 years ago
  Fabian Peter Hammerle f53bc09b77 strip non-relevant columns 4 years ago
1 changed files with 37 additions and 2 deletions
  1. 37 2
      pollution.ipynb

+ 37 - 2
pollution.ipynb

@@ -41,13 +41,48 @@
     "\n",
     "# http://geopandas.org/gallery/create_geopandas_from_pandas.html\n",
     "pollutant_releases_geo = geopandas.GeoDataFrame(\n",
-    "    pollutant_releases,\n",
+    "    pollutant_releases[[\n",
+    "        'FacilityID', 'Lat', 'Long', 'ReportingYear', 'PollutantName', \n",
+    "        'PollutantGroupName', 'TotalQuantity', 'TotalQuantity',\n",
+    "    ]],\n",
     "    geometry=geopandas.points_from_xy(\n",
     "        pollutant_releases['Long'],\n",
     "        pollutant_releases['Lat'],\n",
     "    ),\n",
     ")\n",
-    "pollutant_releases_geo.geometry.head()"
+    "for column_name in pollutant_releases_geo.select_dtypes('object'):\n",
+    "    pollutant_releases_geo[column_name] = pollutant_releases_geo[column_name].astype('category')\n",
+    "pollutant_releases_geo.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'geopandas.geodataframe.GeoDataFrame'>\n",
+      "RangeIndex: 652351 entries, 0 to 652350\n",
+      "Data columns (total 9 columns):\n",
+      "FacilityID            652351 non-null int64\n",
+      "Lat                   652351 non-null float64\n",
+      "Long                  652351 non-null float64\n",
+      "ReportingYear         652351 non-null int64\n",
+      "PollutantName         652351 non-null category\n",
+      "PollutantGroupName    652351 non-null category\n",
+      "TotalQuantity         652351 non-null float64\n",
+      "TotalQuantity         652351 non-null float64\n",
+      "geometry              652351 non-null geometry\n",
+      "dtypes: category(2), float64(4), geometry(1), int64(2)\n",
+      "memory usage: 36.1 MB\n"
+     ]
+    }
+   ],
+   "source": [
+    "pollutant_releases_geo.info()"
    ]
   },
   {