__init__.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. """
  2. Python Library to Read FreeSurfer's cortical parcellation anatomical statistics
  3. ([lh]h.aparc(.*)?.stats)
  4. Freesurfer
  5. https://surfer.nmr.mgh.harvard.edu/
  6. >>> from freesurfer_stats import CorticalParcellationStats
  7. >>> stats = CorticalParcellationStats.read('tests/subjects/fabian/stats/lh.aparc.DKTatlas.stats')
  8. >>> stats.headers['CreationTime'].isoformat()
  9. '2019-05-09T21:05:54+00:00'
  10. >>> stats.headers['cvs_version']
  11. 'Id: mris_anatomical_stats.c,v 1.79 2016/03/14 15:15:34 greve Exp'
  12. >>> stats.headers['cmdline'][:64]
  13. 'mris_anatomical_stats -th3 -mgz -cortex ../label/lh.cortex.label'
  14. >>> stats.hemisphere
  15. >>> stats.whole_brain_measurements['estimated_total_intracranial_volume_mm^3']
  16. 1670487.274486
  17. >>> stats.whole_brain_measurements['white_surface_total_area_mm^2']
  18. 98553.0
  19. >>> stats.structure_measurements[['Structure Name', 'Surface Area (mm^2)', 'Gray Matter Volume (mm^3)']].head()
  20. Structure Name Surface Area (mm^2) Gray Matter Volume (mm^3)
  21. 0 caudalanteriorcingulate 1472 4258
  22. 1 caudalmiddlefrontal 3039 8239
  23. 2 cuneus 2597 6722
  24. 3 entorhinal 499 2379
  25. 4 fusiform 3079 9064
  26. """
  27. import datetime
  28. import re
  29. import typing
  30. import pandas
  31. from freesurfer_stats.version import __version__
  32. class CorticalParcellationStats:
  33. _HEMISPHERE_PREFIX_TO_SIDE = {'lh': 'left', 'rh': 'right'}
  34. _GENERAL_MEASUREMENTS_REGEX = re.compile(
  35. r'^Measure \S+, ([^,\s]+),? ([^,]+), ([\d\.]+), (\S+)$')
  36. _COLUMN_NAMES_NON_SAFE_REGEX = re.compile(r'\s+')
  37. def __init__(self):
  38. self.headers \
  39. = {} # type: typing.Dict[str, typing.Union[str, datetime.datetime]]
  40. self.whole_brain_measurements \
  41. = {} # type: typing.Dict[str, typing.Tuple[float, int]]
  42. self.structure_measurements \
  43. = {} # type: typing.Union[pandas.DataFrame, None]
  44. @property
  45. def hemisphere(self) -> str:
  46. return self._HEMISPHERE_PREFIX_TO_SIDE[self.headers['hemi']]
  47. @staticmethod
  48. def _read_header_line(stream: typing.TextIO) -> str:
  49. line = stream.readline()
  50. assert line.startswith('# ')
  51. return line[2:].rstrip()
  52. @classmethod
  53. def _read_column_header_line(cls, stream: typing.TextIO) -> typing.Tuple[int, str, str]:
  54. line = cls._read_header_line(stream)
  55. assert line.startswith('TableCol'), line
  56. line = line[len('TableCol '):].lstrip()
  57. index, key, value = line.split(maxsplit=2)
  58. return int(index), key, value
  59. def _read_headers(self, stream: typing.TextIO) -> None:
  60. self.headers = {}
  61. while True:
  62. line = self._read_header_line(stream)
  63. if line.startswith('Measure'):
  64. break
  65. elif line:
  66. attr_name, attr_value = line.split(' ', maxsplit=1)
  67. attr_value = attr_value.lstrip()
  68. if attr_name in ['cvs_version', 'mrisurf.c-cvs_version']:
  69. attr_value = attr_value.strip('$').rstrip()
  70. if attr_name == 'CreationTime':
  71. attr_dt = datetime.datetime.strptime(
  72. attr_value, '%Y/%m/%d-%H:%M:%S-%Z')
  73. if attr_dt.tzinfo is None:
  74. assert attr_value.endswith('-GMT')
  75. attr_dt = attr_dt.replace(tzinfo=datetime.timezone.utc)
  76. attr_value = attr_dt
  77. if attr_name == 'AnnotationFileTimeStamp':
  78. attr_value = datetime.datetime.strptime(
  79. attr_value, '%Y/%m/%d %H:%M:%S')
  80. self.headers[attr_name] = attr_value
  81. @classmethod
  82. def _format_column_name(cls, name: str, unit: typing.Optional[str]) -> str:
  83. column_name = name.lower()
  84. if unit not in ['unitless', 'NA']:
  85. column_name += '_' + unit
  86. return cls._COLUMN_NAMES_NON_SAFE_REGEX.sub('_', column_name)
  87. @classmethod
  88. def _read_column_attributes(cls, num: int, stream: typing.TextIO) \
  89. -> typing.List[typing.Dict[str, str]]:
  90. columns = []
  91. for column_index in range(1, int(num) + 1):
  92. column_attrs = {}
  93. for _ in range(3):
  94. column_index_line, key, value \
  95. = cls._read_column_header_line(stream)
  96. assert column_index_line == column_index
  97. assert key not in column_attrs
  98. column_attrs[key] = value
  99. columns.append(column_attrs)
  100. return columns
  101. def _read(self, stream: typing.TextIO) -> None:
  102. assert stream.readline().rstrip() \
  103. == '# Table of FreeSurfer cortical parcellation anatomical statistics'
  104. assert stream.readline().rstrip() == '#'
  105. self._read_headers(stream)
  106. self.whole_brain_measurements = {}
  107. line = self._read_header_line(stream)
  108. while not line.startswith('NTableCols'):
  109. key, name, value, unit \
  110. = self._GENERAL_MEASUREMENTS_REGEX.match(line).groups()
  111. if key == 'SupraTentorialVolNotVent' and name.lower() == 'supratentorial volume':
  112. name += ' Without Ventricles'
  113. column_name = self._format_column_name(name, unit)
  114. assert column_name not in self.whole_brain_measurements, \
  115. (key, name, column_name, self.whole_brain_measurements)
  116. self.whole_brain_measurements[column_name] = float(value)
  117. line = self._read_header_line(stream)
  118. columns = self._read_column_attributes(
  119. int(line[len('NTableCols '):]), stream)
  120. assert self._read_header_line(stream) \
  121. == 'ColHeaders ' + ' '.join(c['ColHeader'] for c in columns)
  122. self.structure_measurements = pandas.DataFrame(
  123. (line.rstrip().split() for line in stream),
  124. columns=[self._format_column_name(c['FieldName'], c['Units']) for c in columns]) \
  125. .apply(pandas.to_numeric, errors='ignore')
  126. @classmethod
  127. def read(cls, path: str) -> 'CorticalParcellationStats':
  128. stats = cls()
  129. with open(path, 'r') as stream:
  130. # pylint: disable=protected-access
  131. stats._read(stream)
  132. return stats