__init__.py 1.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. """
  2. Generate BibTeX Entries for PubMed Publications
  3. This module utilizes the API of TeXMed,
  4. a BibTeX interface for PubMed.
  5. TeXMed was written by Arne Muller
  6. https://www.bioinformatics.org/texmed/
  7. """
  8. import argparse
  9. import html.parser
  10. import re
  11. import requests
  12. _TEXMED_URL_PATTERN = 'https://www.bioinformatics.org/texmed/cgi-bin' \
  13. '/list.cgi?PMID={pmid}&linkOut'
  14. class _TeXMedHtmlParser(html.parser.HTMLParser):
  15. def __init__(self):
  16. self.bibtex_entry = None
  17. super().__init__()
  18. @staticmethod
  19. def _strip_bibtex_entry(data: str) -> str:
  20. return re.sub(r'\n\% \d+\s?\n', '', data).strip() + '\n'
  21. def handle_data(self, data: str) -> None:
  22. if 'Author' in data:
  23. self.bibtex_entry = self._strip_bibtex_entry(data)
  24. def error(self, message) -> None:
  25. raise Exception(message)
  26. def bibtex_entry_from_pmid(pmid: str) -> str:
  27. assert pmid.isdigit(), pmid
  28. resp = requests.get(_TEXMED_URL_PATTERN.format(pmid=pmid))
  29. resp.raise_for_status()
  30. parser = _TeXMedHtmlParser()
  31. parser.feed(resp.text)
  32. return parser.bibtex_entry
  33. def _main():
  34. argparser = argparse.ArgumentParser(
  35. description=__doc__.strip(),
  36. formatter_class=argparse.RawDescriptionHelpFormatter)
  37. argparser.add_argument('pmid')
  38. args = argparser.parse_args()
  39. print(bibtex_entry_from_pmid(pmid=args.pmid),
  40. end='')
  41. if __name__ == '__main__':
  42. _main()