__init__.py 3.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. """
  2. Generate BibTeX Entries for PubMed Publications
  3. This module utilizes the API of TeXMed,
  4. a BibTeX interface for PubMed.
  5. TeXMed was written by Arne Muller
  6. https://www.bioinformatics.org/texmed/
  7. Command Line Example:
  8. $ pubmed-bibtex 31025164
  9. @Article{pmid31025164,
  10. Author="...",
  11. Title="...",
  12. Journal="...",
  13. ...
  14. }
  15. Python Example:
  16. >>> from pubmed_bibtex import bibtex_entry_from_pmid
  17. >>> print(bibtex_entry_from_pmid(123456789))
  18. Copyright (C) 2019 Fabian Peter Hammerle <fabian@hammerle.me>
  19. This program is free software: you can redistribute it and/or modify
  20. it under the terms of the GNU General Public License as published by
  21. the Free Software Foundation, either version 3 of the License, or
  22. (at your option) any later version.
  23. This program is distributed in the hope that it will be useful,
  24. but WITHOUT ANY WARRANTY; without even the implied warranty of
  25. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  26. GNU General Public License for more details.
  27. You should have received a copy of the GNU General Public License
  28. along with this program. If not, see <https://www.gnu.org/licenses/>.
  29. """
  30. import html.parser
  31. import logging
  32. import re
  33. import typing
  34. import urllib.parse
  35. import urllib.request
  36. from pubmed_bibtex.version import __version__
  37. __all__ = ["__version__", "bibtex_entry_from_pmid"]
  38. _TEXMED_URL_PATTERN = (
  39. "https://www.bioinformatics.org/texmed/cgi-bin/list.cgi?PMID={pmid}&linkOut"
  40. )
  41. _LOGGER = logging.getLogger(__name__)
  42. class _TeXMedHtmlParser(html.parser.HTMLParser):
  43. def __init__(self) -> None:
  44. self.bibtex_entry: typing.Optional[str] = None
  45. super().__init__()
  46. @staticmethod
  47. def _strip_bibtex_entry(data: str) -> str:
  48. return re.sub(r"\n\% \d+\s?\n", "", data).strip() + "\n"
  49. def handle_data(self, data: str) -> None:
  50. if "Author" in data:
  51. self.bibtex_entry = self._strip_bibtex_entry(data)
  52. def error(self, message: str) -> None: # pragma: no cover
  53. # removed in python3.10:
  54. # https://github.com/python/cpython/commit/e34bbfd61f405eef89e8aa50672b0b25022de320
  55. # https://web.archive.org/web/20220326053316/https://bugs.python.org/issue31844
  56. # pylint: disable=no-self-use; python>=3.10
  57. raise Exception(message)
  58. def bibtex_entry_from_pmid(pmid: str, retries: int = 2) -> typing.Optional[str]:
  59. assert pmid.isdigit(), pmid
  60. parser = _TeXMedHtmlParser()
  61. for attempt_index in range(1, retries + 2):
  62. with urllib.request.urlopen( # raises urllib.error.HTTPError
  63. _TEXMED_URL_PATTERN.format(pmid=urllib.parse.quote(pmid))
  64. ) as resp:
  65. parser.feed(resp.read().decode("utf-8"))
  66. if parser.bibtex_entry is None:
  67. _LOGGER.log(
  68. logging.WARNING if attempt_index <= retries else logging.ERROR,
  69. "attempt #%d/%d to fetch bibtex entry failed",
  70. attempt_index,
  71. retries + 1,
  72. )
  73. else:
  74. break
  75. return parser.bibtex_entry