__init__.py 3.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. import argparse
  2. import os
  3. import re
  4. import typing
  5. import pandas
  6. # https://surfer.nmr.mgh.harvard.edu/fswiki/HippocampalSubfields
  7. HIPPOCAMPAL_VOLUME_FILENAME_PATTERN = r'^(?P<h>[lr])h\.hippoSfVolumes' \
  8. r'(?P<T1>-T1)?(-(?P<analysis_id>.+?))?\.v10.txt$'
  9. HIPPOCAMPAL_VOLUME_FILENAME_REGEX = re.compile(HIPPOCAMPAL_VOLUME_FILENAME_PATTERN)
  10. DEFAULT_HIPPOCAMPAL_VOLUME_FIND_FILENAME_PATTERN = re.sub(r'\?P<.+?>', '',
  11. HIPPOCAMPAL_VOLUME_FILENAME_PATTERN)
  12. VOLUME_FILENAME_HEMISPHERE_MAP = {'l': 'left', 'r': 'right'}
  13. def find_hippocampal_volume_files(root_dir_path: str,
  14. filename_regex: typing.Pattern = HIPPOCAMPAL_VOLUME_FILENAME_REGEX
  15. ) -> typing.Iterator[str]:
  16. for dirpath, _, filenames in os.walk(root_dir_path):
  17. for filename in filter(filename_regex.search, filenames):
  18. yield os.path.join(dirpath, filename)
  19. def read_hippocampal_volumes(volume_file_path: str) -> dict:
  20. subfield_volumes = {}
  21. with open(volume_file_path, 'r') as volume_file:
  22. for line in volume_file.read().rstrip().split('\n'):
  23. subfield_name, subfield_volume_str = line.split(' ')
  24. subfield_volumes[subfield_name] = float(subfield_volume_str)
  25. return subfield_volumes
  26. def parse_hippocampal_volume_file_path(volume_file_path: str) -> dict:
  27. subject_dir_path = os.path.dirname(os.path.dirname(os.path.abspath(volume_file_path)))
  28. filename_match = HIPPOCAMPAL_VOLUME_FILENAME_REGEX.match(os.path.basename(volume_file_path))
  29. assert filename_match, volume_file_path
  30. filename_groups = filename_match.groupdict()
  31. assert filename_groups['T1'] or filename_groups['analysis_id'], volume_file_path
  32. return {
  33. 'subject': os.path.basename(subject_dir_path),
  34. 'hemisphere': VOLUME_FILENAME_HEMISPHERE_MAP[filename_groups['h']],
  35. 'T1_input': filename_groups['T1'] is not None,
  36. 'analysis_id': filename_groups['analysis_id'],
  37. }
  38. def read_hippocampal_volume_file_dataframe(volume_file_path: str) -> pandas.DataFrame:
  39. volumes_frame = pandas.DataFrame(
  40. read_hippocampal_volumes(volume_file_path).items(),
  41. columns=['subfield', 'volume'])
  42. for key, value in parse_hippocampal_volume_file_path(volume_file_path).items():
  43. volumes_frame[key] = value
  44. # volumes_frame['hemisphere'] = volumes_frame['hemisphere'].astype('category')
  45. return volumes_frame
  46. def main():
  47. argparser = argparse.ArgumentParser(
  48. description='Read hippocampal subfield volumes computed by Freesurfer'
  49. '\nhttps://surfer.nmr.mgh.harvard.edu/fswiki/HippocampalSubfields')
  50. argparser.add_argument('--filename-regex', dest='filename_pattern',
  51. default=DEFAULT_HIPPOCAMPAL_VOLUME_FIND_FILENAME_PATTERN,
  52. help='default: %(default)s')
  53. argparser.add_argument('--output-format', choices=['csv'], default='csv',
  54. help='default: %(default)s')
  55. subjects_dir_path = os.environ.get('SUBJECTS_DIR', None)
  56. argparser.add_argument('root_dir_path',
  57. nargs='?' if subjects_dir_path else 1,
  58. default=[subjects_dir_path],
  59. help='default: $SUBJECTS_DIR ({})'.format(subjects_dir_path))
  60. args = argparser.parse_args()
  61. volume_file_paths = find_hippocampal_volume_files(
  62. root_dir_path=args.root_dir_path[0],
  63. filename_regex=re.compile(args.filename_pattern))
  64. volume_frames = []
  65. for volume_file_path in volume_file_paths:
  66. volume_frame = read_hippocampal_volume_file_dataframe(volume_file_path)
  67. volume_frame['source_path'] = os.path.abspath(volume_file_path)
  68. volume_frames.append(volume_frame)
  69. united_volume_frame = pandas.concat(volume_frames, ignore_index=True)
  70. print(united_volume_frame.to_csv(index=False))
  71. if __name__ == '__main__':
  72. main()