__init__.py 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. """
  2. Python Library to Read FreeSurfer's Cortical Parcellation Anatomical Statistics
  3. ([lh]h.aparc(.*)?.stats)
  4. Freesurfer
  5. https://surfer.nmr.mgh.harvard.edu/
  6. >>> from freesurfer_stats import CorticalParcellationStats
  7. >>> stats = CorticalParcellationStats.read('tests/subjects/fabian/stats/lh.aparc.DKTatlas.stats')
  8. >>> stats.headers['CreationTime'].isoformat()
  9. '2019-05-09T21:05:54+00:00'
  10. >>> stats.headers['cvs_version']
  11. 'Id: mris_anatomical_stats.c,v 1.79 2016/03/14 15:15:34 greve Exp'
  12. >>> stats.headers['cmdline'][:64]
  13. 'mris_anatomical_stats -th3 -mgz -cortex ../label/lh.cortex.label'
  14. >>> stats.hemisphere
  15. >>> stats.whole_brain_measurements['estimated_total_intracranial_volume_mm^3']
  16. 0 1.670487e+06
  17. Name: estimated_total_intracranial_volume_mm^3, dtype: float64
  18. >>> stats.whole_brain_measurements['white_surface_total_area_mm^2']
  19. 0 98553
  20. Name: white_surface_total_area_mm^2, dtype: int64
  21. >>> stats.structural_measurements[['structure_name', 'surface_area_mm^2',
  22. ... 'gray_matter_volume_mm^3']].head()
  23. structure_name surface_area_mm^2 gray_matter_volume_mm^3
  24. 0 caudalanteriorcingulate 1472 4258
  25. 1 caudalmiddlefrontal 3039 8239
  26. 2 cuneus 2597 6722
  27. 3 entorhinal 499 2379
  28. 4 fusiform 3079 9064
  29. Copyright (C) 2019 Fabian Peter Hammerle <fabian@hammerle.me>
  30. This program is free software: you can redistribute it and/or modify
  31. it under the terms of the GNU General Public License as published by
  32. the Free Software Foundation, either version 3 of the License, or
  33. any later version.
  34. This program is distributed in the hope that it will be useful,
  35. but WITHOUT ANY WARRANTY; without even the implied warranty of
  36. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  37. GNU General Public License for more details.
  38. You should have received a copy of the GNU General Public License
  39. along with this program. If not, see <https://www.gnu.org/licenses/>.
  40. """
  41. import datetime
  42. import re
  43. import typing
  44. import numpy
  45. import pandas
  46. from freesurfer_stats.version import __version__
  47. class CorticalParcellationStats:
  48. _HEMISPHERE_PREFIX_TO_SIDE = {'lh': 'left', 'rh': 'right'}
  49. _GENERAL_MEASUREMENTS_REGEX = re.compile(
  50. r'^Measure \S+, ([^,\s]+),? ([^,]+), ([\d\.]+), (\S+)$')
  51. _COLUMN_NAMES_NON_SAFE_REGEX = re.compile(r'\s+')
  52. def __init__(self):
  53. self.headers \
  54. = {} # type: typing.Dict[str, typing.Union[str, datetime.datetime]]
  55. self.whole_brain_measurements \
  56. = {} # type: typing.Dict[str, typing.Tuple[float, int]]
  57. self.structural_measurements \
  58. = {} # type: typing.Union[pandas.DataFrame, None]
  59. @property
  60. def hemisphere(self) -> str:
  61. return self._HEMISPHERE_PREFIX_TO_SIDE[self.headers['hemi']]
  62. @staticmethod
  63. def _read_header_line(stream: typing.TextIO) -> str:
  64. line = stream.readline()
  65. assert line.startswith('# ')
  66. return line[2:].rstrip()
  67. @classmethod
  68. def _read_column_header_line(cls, stream: typing.TextIO) -> typing.Tuple[int, str, str]:
  69. line = cls._read_header_line(stream)
  70. assert line.startswith('TableCol'), line
  71. line = line[len('TableCol '):].lstrip()
  72. index, key, value = line.split(maxsplit=2)
  73. return int(index), key, value
  74. def _read_headers(self, stream: typing.TextIO) -> None:
  75. self.headers = {}
  76. while True:
  77. line = self._read_header_line(stream)
  78. if line.startswith('Measure'):
  79. break
  80. if line:
  81. attr_name, attr_value = line.split(" ", maxsplit=1)
  82. attr_value = attr_value.lstrip()
  83. if attr_name in ['cvs_version', 'mrisurf.c-cvs_version']:
  84. attr_value = attr_value.strip('$').rstrip()
  85. if attr_name == 'CreationTime':
  86. attr_dt = datetime.datetime.strptime(
  87. attr_value, '%Y/%m/%d-%H:%M:%S-%Z')
  88. if attr_dt.tzinfo is None:
  89. assert attr_value.endswith('-GMT')
  90. attr_dt = attr_dt.replace(tzinfo=datetime.timezone.utc)
  91. attr_value = attr_dt
  92. if attr_name == 'AnnotationFileTimeStamp':
  93. attr_value = datetime.datetime.strptime(
  94. attr_value, '%Y/%m/%d %H:%M:%S')
  95. self.headers[attr_name] = attr_value
  96. @classmethod
  97. def _format_column_name(cls, name: str, unit: typing.Optional[str]) -> str:
  98. column_name = name.lower()
  99. if unit not in ["unitless", "NA"]:
  100. column_name += "_" + unit
  101. return cls._COLUMN_NAMES_NON_SAFE_REGEX.sub("_", column_name)
  102. @classmethod
  103. def _parse_whole_brain_measurements_line(
  104. cls, line: str,
  105. ) -> typing.Tuple[str, numpy.ndarray]:
  106. match = cls._GENERAL_MEASUREMENTS_REGEX.match(line)
  107. if not match:
  108. raise ValueError("unexpected line: {!r}".format(line))
  109. key, name, value, unit = match.groups()
  110. if (
  111. key == "SupraTentorialVolNotVent"
  112. and name.lower() == "supratentorial volume"
  113. ):
  114. name += " Without Ventricles"
  115. column_name = cls._format_column_name(name, unit)
  116. return column_name, pandas.to_numeric([value], errors="raise")
  117. @classmethod
  118. def _read_column_attributes(cls, num: int, stream: typing.TextIO) \
  119. -> typing.List[typing.Dict[str, str]]:
  120. columns = []
  121. for column_index in range(1, int(num) + 1):
  122. column_attrs = {}
  123. for _ in range(3):
  124. column_index_line, key, value \
  125. = cls._read_column_header_line(stream)
  126. assert column_index_line == column_index
  127. assert key not in column_attrs
  128. column_attrs[key] = value
  129. columns.append(column_attrs)
  130. return columns
  131. def _read(self, stream: typing.TextIO) -> None:
  132. assert stream.readline().rstrip() \
  133. == '# Table of FreeSurfer cortical parcellation anatomical statistics'
  134. assert stream.readline().rstrip() == '#'
  135. self._read_headers(stream)
  136. self.whole_brain_measurements = pandas.DataFrame()
  137. line = self._read_header_line(stream)
  138. while not line.startswith("NTableCols"):
  139. if line.startswith("BrainVolStatsFixed"):
  140. # https://surfer.nmr.mgh.harvard.edu/fswiki/BrainVolStatsFixed
  141. assert (
  142. line.startswith("BrainVolStatsFixed see ")
  143. or line == "BrainVolStatsFixed-NotNeeded because voxelvolume=1mm3"
  144. )
  145. self.headers["BrainVolStatsFixed"] = line[len("BrainVolStatsFixed-") :]
  146. else:
  147. column_name, value = self._parse_whole_brain_measurements_line(line)
  148. assert column_name not in self.whole_brain_measurements, column_name
  149. self.whole_brain_measurements[column_name] = value
  150. line = self._read_header_line(stream)
  151. columns = self._read_column_attributes(
  152. int(line[len('NTableCols '):]), stream)
  153. assert self._read_header_line(stream) \
  154. == 'ColHeaders ' + ' '.join(c['ColHeader'] for c in columns)
  155. self.structural_measurements = pandas.DataFrame(
  156. (line.rstrip().split() for line in stream),
  157. columns=[self._format_column_name(c['FieldName'], c['Units']) for c in columns]) \
  158. .apply(pandas.to_numeric, errors='ignore')
  159. @classmethod
  160. def read(cls, path: str) -> 'CorticalParcellationStats':
  161. stats = cls()
  162. with open(path, 'r') as stream:
  163. # pylint: disable=protected-access
  164. stats._read(stream)
  165. return stats