__init__.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. """
  2. Read hippocampal subfield volumes computed by Freesurfer
  3. https://surfer.nmr.mgh.harvard.edu/fswiki/HippocampalSubfields
  4. """
  5. import argparse
  6. import os
  7. import re
  8. import typing
  9. import pandas
  10. from freesurfer_volume_reader.freesurfer import FreesurferHippocampalVolumeFile
  11. VOLUME_FILENAME_HEMISPHERE_MAP = {'l': 'left', 'r': 'right'}
  12. def remove_group_names_from_regex(regex_pattern: str) -> str:
  13. return re.sub(r'\?P<.+?>', '', regex_pattern)
  14. def read_hippocampal_volumes_mm3(volume_file_path: str) -> dict:
  15. subfield_volumes = {}
  16. with open(volume_file_path, 'r') as volume_file:
  17. for line in volume_file.read().rstrip().split('\n'):
  18. # https://github.com/freesurfer/freesurfer/blob/release_6_0_0/HippoSF/src/segmentSubjectT1T2_autoEstimateAlveusML.m#L8
  19. # https://github.com/freesurfer/freesurfer/blob/release_6_0_0/HippoSF/src/segmentSubjectT1T2_autoEstimateAlveusML.m#L1946
  20. subfield_name, subfield_volume_mm3_str = line.split(' ')
  21. subfield_volumes[subfield_name] = float(subfield_volume_mm3_str)
  22. return subfield_volumes
  23. def parse_hippocampal_volume_file_path(volume_file_path: str) -> dict:
  24. subject_dir_path = os.path.dirname(os.path.dirname(os.path.abspath(volume_file_path)))
  25. filename_match = FreesurferHippocampalVolumeFile.FILENAME_REGEX.match(
  26. os.path.basename(volume_file_path))
  27. assert filename_match, volume_file_path
  28. filename_groups = filename_match.groupdict()
  29. assert filename_groups['T1'] or filename_groups['analysis_id'], volume_file_path
  30. return {
  31. 'subject': os.path.basename(subject_dir_path),
  32. 'hemisphere': VOLUME_FILENAME_HEMISPHERE_MAP[filename_groups['h']],
  33. 'T1_input': filename_groups['T1'] is not None,
  34. 'analysis_id': filename_groups['analysis_id'],
  35. }
  36. def read_hippocampal_volume_file_dataframe(volume_file_path: str) -> pandas.DataFrame:
  37. volumes_frame = pandas.DataFrame([
  38. {'subfield': s, 'volume_mm^3': v}
  39. for s, v in read_hippocampal_volumes_mm3(volume_file_path).items()
  40. ])
  41. for key, value in parse_hippocampal_volume_file_path(volume_file_path).items():
  42. volumes_frame[key] = value
  43. # volumes_frame['hemisphere'] = volumes_frame['hemisphere'].astype('category')
  44. return volumes_frame
  45. def main():
  46. argparser = argparse.ArgumentParser(description=__doc__)
  47. argparser.add_argument('--filename-regex', type=re.compile,
  48. default=remove_group_names_from_regex(
  49. FreesurferHippocampalVolumeFile.FILENAME_PATTERN),
  50. help='default: %(default)s')
  51. argparser.add_argument('--output-format', choices=['csv'], default='csv',
  52. help='default: %(default)s')
  53. subjects_dir_path = os.environ.get('SUBJECTS_DIR', None)
  54. argparser.add_argument('root_dir_paths',
  55. metavar='ROOT_DIR',
  56. nargs='*' if subjects_dir_path else '+',
  57. default=[subjects_dir_path],
  58. help='default: $SUBJECTS_DIR ({})'.format(subjects_dir_path))
  59. args = argparser.parse_args()
  60. volume_file_paths = [p for d in args.root_dir_paths
  61. for p in FreesurferHippocampalVolumeFile.find(
  62. root_dir_path=d, filename_regex=args.filename_regex)]
  63. volume_frames = []
  64. for volume_file_path in volume_file_paths:
  65. volume_frame = read_hippocampal_volume_file_dataframe(volume_file_path)
  66. volume_frame['source_path'] = os.path.abspath(volume_file_path)
  67. volume_frames.append(volume_frame)
  68. united_volume_frame = pandas.concat(volume_frames, ignore_index=True)
  69. print(united_volume_frame.to_csv(index=False))
  70. if __name__ == '__main__':
  71. main()