Browse Source

CorticalParcellationStats.read: support pathlib.Path, http, s3 etc. by utilizing pandas.io.common.get_filepath_or_buffer

https://github.com/fphammerle/freesurfer-stats/issues/6
https://github.com/fphammerle/freesurfer-stats/pull/7/files
Fabian Peter Hammerle 3 years ago
parent
commit
81972477c8
4 changed files with 72 additions and 3 deletions
  1. 4 0
      CHANGELOG.md
  2. 17 0
      README.rst
  3. 28 3
      freesurfer_stats/__init__.py
  4. 23 0
      tests/test_cortical_parcellation_stats.py

+ 4 - 0
CHANGELOG.md

@@ -5,6 +5,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## [Unreleased]
+### Added
+- `CorticalParcellationStats.read` support `pathlib.Path`, `"http://…"`, `"https://…"`, `"s3://…"` etc.
+  via `pandas.io.common.get_filepath_or_buffer`
+  (https://github.com/fphammerle/freesurfer-stats/issues/6)
 
 ## [1.1.1] - 2020-05-07
 ### Fixed

+ 17 - 0
README.rst

@@ -88,6 +88,23 @@ Load Multiple Stats Files
     6  fabian      rh.aparc.pial.stats      right                            NaN                      121260.0
     7  fabian    lh.aparc.a2009s.stats       left                        98553.0                           NaN
 
+Load Stats File From Webserver, Amazon S3 or Google Cloud Storage
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: python
+
+    >>> from freesurfer_stats import CorticalParcellationStats
+    >>> freesurfer_stats.CorticalParcellationStats('https://[...]/stats/rh.aparc.stats').read()
+    >>> stats.whole_brain_measurements['total_cortical_gray_matter_volume_mm^3']
+    0    553998.311189
+    Name: total_cortical_gray_matter_volume_mm^3, dtype: float64
+
+Replace ``https://`` with ``s3://`` or ``gcs://``.
+
+Credentials for S3 may be provided in ``~/.aws/credentials``
+or via environment variables.
+See `S3Fs docs <https://s3fs.readthedocs.io/en/latest/#credentials>`__.
+
 Tests
 -----
 

+ 28 - 3
freesurfer_stats/__init__.py

@@ -47,6 +47,8 @@ along with this program.  If not, see <https://www.gnu.org/licenses/>.
 """
 
 import datetime
+import io
+import pathlib
 import re
 import typing
 
@@ -180,9 +182,32 @@ class CorticalParcellationStats:
             .apply(pandas.to_numeric, errors='ignore')
 
     @classmethod
-    def read(cls, path: str) -> 'CorticalParcellationStats':
+    def read(cls, path: typing.Union[str, pathlib.Path]) -> "CorticalParcellationStats":
+        # path_or_buffer: typing.Union[str, pathlib.Path, typing.IO[typing.AnyStr],
+        #                              s3fs.S3File, gcsfs.GCSFile]
+        # https://github.com/pandas-dev/pandas/blob/v0.25.3/pandas/io/parsers.py#L436
+        # https://github.com/pandas-dev/pandas/blob/v0.25.3/pandas/_typing.py#L30
+        (
+            path_or_buffer,
+            _,
+            _,
+            *instructions,
+        ) = pandas.io.common.get_filepath_or_buffer(path)
+        # https://github.com/pandas-dev/pandas/blob/v0.25.3/pandas/io/common.py#L171
+        # https://github.com/pandas-dev/pandas/blob/v0.21.0/pandas/io/common.py#L171
+        if instructions:  # pragma: no cover
+            assert len(instructions) == 1, instructions
+            should_close = instructions[0]
+        else:  # pragma: no cover
+            should_close = hasattr(path_or_buffer, "close")
         stats = cls()
-        with open(path, 'r') as stream:
+        if hasattr(path_or_buffer, "readline"):
             # pylint: disable=protected-access
-            stats._read(stream)
+            stats._read(io.TextIOWrapper(path_or_buffer))
+        else:
+            with open(path_or_buffer, "r") as stream:
+                # pylint: disable=protected-access
+                stats._read(stream)
+        if should_close:
+            path_or_buffer.close()
         return stats

+ 23 - 0
tests/test_cortical_parcellation_stats.py

@@ -17,6 +17,7 @@ along with this program.  If not, see <https://www.gnu.org/licenses/>.
 """
 import datetime
 import os
+import pathlib
 
 import numpy
 import pandas.util.testing
@@ -288,3 +289,25 @@ def test__parse_whole_brain_measurements_line_parse_error(line):
     # pylint: disable=protected-access
     with pytest.raises(ValueError):
         CorticalParcellationStats._parse_whole_brain_measurements_line(line)
+
+
+@pytest.mark.parametrize(
+    "path_str",
+    [os.path.join(SUBJECTS_DIR, "fabian", "stats", "lh.aparc.DKTatlas.stats.short"),],
+)
+def test_read_pathlib(path_str: str):
+    stats_str = CorticalParcellationStats.read(path_str)
+    stats_pathlib = CorticalParcellationStats.read(pathlib.Path(path_str))
+    assert stats_str.headers == stats_pathlib.headers
+
+
+@pytest.mark.parametrize(
+    "url",
+    [
+        "https://raw.githubusercontent.com/fphammerle/freesurfer-stats"
+        "/master/tests/subjects/fabian/stats/rh.aparc.stats"
+    ],
+)
+def test_read_https(url: str):
+    stats = CorticalParcellationStats.read(url)
+    assert stats.headers["generating_program"] == "mris_anatomical_stats"