__init__.py 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. """
  2. Generate BibTeX Entries for PubMed Publications
  3. This module utilizes the API of TeXMed,
  4. a BibTeX interface for PubMed.
  5. TeXMed was written by Arne Muller
  6. https://www.bioinformatics.org/texmed/
  7. Command Line Example:
  8. $ pubmed-bibtex 31025164
  9. @Article{pmid31025164,
  10. Author="...",
  11. Title="...",
  12. Journal="...",
  13. ...
  14. }
  15. Python Example:
  16. >>> from pubmed_bibtex import bibtex_entry_from_pmid
  17. >>> print(bibtex_entry_from_pmid(123456789))
  18. """
  19. import html.parser
  20. import re
  21. import requests
  22. _TEXMED_URL_PATTERN = 'https://www.bioinformatics.org/texmed/cgi-bin' \
  23. '/list.cgi?PMID={pmid}&linkOut'
  24. class _TeXMedHtmlParser(html.parser.HTMLParser):
  25. def __init__(self):
  26. self.bibtex_entry = None
  27. super().__init__()
  28. @staticmethod
  29. def _strip_bibtex_entry(data: str) -> str:
  30. return re.sub(r'\n\% \d+\s?\n', '', data).strip() + '\n'
  31. def handle_data(self, data: str) -> None:
  32. if 'Author' in data:
  33. self.bibtex_entry = self._strip_bibtex_entry(data)
  34. def error(self, message) -> None:
  35. raise Exception(message)
  36. def bibtex_entry_from_pmid(pmid: str) -> str:
  37. assert pmid.isdigit(), pmid
  38. resp = requests.get(_TEXMED_URL_PATTERN.format(pmid=pmid))
  39. resp.raise_for_status()
  40. parser = _TeXMedHtmlParser()
  41. parser.feed(resp.text)
  42. return parser.bibtex_entry