__init__.py 1.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. """
  2. Generate BibTeX Entries for PubMed Publications
  3. This module utilizes the API of TeXMed,
  4. a BibTeX interface for PubMed.
  5. TeXMed was written by Arne Muller
  6. https://www.bioinformatics.org/texmed/
  7. """
  8. import html.parser
  9. import re
  10. import requests
  11. _TEXMED_URL_PATTERN = 'https://www.bioinformatics.org/texmed/cgi-bin' \
  12. '/list.cgi?PMID={pmid}&linkOut'
  13. class _TeXMedHtmlParser(html.parser.HTMLParser):
  14. def __init__(self):
  15. self.bibtex_entry = None
  16. super().__init__()
  17. @staticmethod
  18. def _strip_bibtex_entry(data: str) -> str:
  19. return re.sub(r'\n\% \d+\s?\n', '', data).strip() + '\n'
  20. def handle_data(self, data: str) -> None:
  21. if 'Author' in data:
  22. self.bibtex_entry = self._strip_bibtex_entry(data)
  23. def error(self, message) -> None:
  24. raise Exception(message)
  25. def bibtex_entry_from_pmid(pmid: str) -> str:
  26. assert pmid.isdigit(), pmid
  27. resp = requests.get(_TEXMED_URL_PATTERN.format(pmid=pmid))
  28. resp.raise_for_status()
  29. parser = _TeXMedHtmlParser()
  30. parser.feed(resp.text)
  31. return parser.bibtex_entry