__init__.py 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. """
  2. Generate BibTeX Entries for PubMed Publications
  3. This module utilizes the API of TeXMed,
  4. a BibTeX interface for PubMed.
  5. TeXMed was written by Arne Muller
  6. https://www.bioinformatics.org/texmed/
  7. Command Line Example:
  8. $ pubmed-bibtex 31025164
  9. @Article{pmid31025164,
  10. Author="...",
  11. Title="...",
  12. Journal="...",
  13. ...
  14. }
  15. Python Example:
  16. >>> from pubmed_bibtex import bibtex_entry_from_pmid
  17. >>> print(bibtex_entry_from_pmid(123456789))
  18. """
  19. import html.parser
  20. import re
  21. import requests
  22. from pubmed_bibtex.version import __version__
  23. _TEXMED_URL_PATTERN = 'https://www.bioinformatics.org/texmed/cgi-bin' \
  24. '/list.cgi?PMID={pmid}&linkOut'
  25. class _TeXMedHtmlParser(html.parser.HTMLParser):
  26. def __init__(self):
  27. self.bibtex_entry = None
  28. super().__init__()
  29. @staticmethod
  30. def _strip_bibtex_entry(data: str) -> str:
  31. return re.sub(r'\n\% \d+\s?\n', '', data).strip() + '\n'
  32. def handle_data(self, data: str) -> None:
  33. if 'Author' in data:
  34. self.bibtex_entry = self._strip_bibtex_entry(data)
  35. def error(self, message) -> None:
  36. raise Exception(message) # pragma: no cover
  37. def bibtex_entry_from_pmid(pmid: str) -> str:
  38. assert pmid.isdigit(), pmid
  39. resp = requests.get(_TEXMED_URL_PATTERN.format(pmid=pmid))
  40. resp.raise_for_status()
  41. parser = _TeXMedHtmlParser()
  42. parser.feed(resp.text)
  43. return parser.bibtex_entry