Browse Source

retry if server's response does not include bibtex entry

Fabian Peter Hammerle 2 years ago
parent
commit
bb464bdeaf
4 changed files with 36 additions and 7 deletions
  1. 2 0
      CHANGELOG.md
  2. 18 5
      pubmed_bibtex/__init__.py
  3. 14 0
      tests/test_bibtex_entry_from_pmid.py
  4. 2 2
      tests/test_main.py

+ 2 - 0
CHANGELOG.md

@@ -5,6 +5,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## [Unreleased]
+### Fixed
+- retry if server's response does not include bibtex entry
 
 ## [1.0.0] - 2022-01-23
 ### Added

+ 18 - 5
pubmed_bibtex/__init__.py

@@ -37,6 +37,7 @@ along with this program.  If not, see <https://www.gnu.org/licenses/>.
 """
 
 import html.parser
+import logging
 import re
 import typing
 import urllib.parse
@@ -50,6 +51,8 @@ _TEXMED_URL_PATTERN = (
     "https://www.bioinformatics.org/texmed/cgi-bin/list.cgi?PMID={pmid}&linkOut"
 )
 
+_LOGGER = logging.getLogger(__name__)
+
 
 class _TeXMedHtmlParser(html.parser.HTMLParser):
     def __init__(self) -> None:
@@ -69,11 +72,21 @@ class _TeXMedHtmlParser(html.parser.HTMLParser):
         raise Exception(message)  # pragma: no cover
 
 
-def bibtex_entry_from_pmid(pmid: str) -> typing.Optional[str]:
+def bibtex_entry_from_pmid(pmid: str, retries: int = 2) -> typing.Optional[str]:
     assert pmid.isdigit(), pmid
     parser = _TeXMedHtmlParser()
-    with urllib.request.urlopen(  # raises urllib.error.HTTPError
-        _TEXMED_URL_PATTERN.format(pmid=urllib.parse.quote(pmid))
-    ) as resp:
-        parser.feed(resp.read().decode("utf-8"))
+    for attempt_index in range(1, retries + 2):
+        with urllib.request.urlopen(  # raises urllib.error.HTTPError
+            _TEXMED_URL_PATTERN.format(pmid=urllib.parse.quote(pmid))
+        ) as resp:
+            parser.feed(resp.read().decode("utf-8"))
+        if parser.bibtex_entry is None:
+            _LOGGER.log(
+                logging.WARNING if attempt_index <= retries else logging.ERROR,
+                "attempt #%d/%d to fetch bibtex entry failed",
+                attempt_index,
+                retries + 1,
+            )
+        else:
+            break
     return parser.bibtex_entry

+ 14 - 0
tests/test_bibtex_entry_from_pmid.py

@@ -1,3 +1,4 @@
+import logging
 import unittest.mock
 import urllib.error
 
@@ -21,3 +22,16 @@ def test_bibtex_entry_from_pmid() -> None:
 def test_bibtex_entry_from_pmid_not_found() -> None:
     with pytest.raises(urllib.error.HTTPError, match=r"^HTTP Error 404: Not Found$"):
         pubmed_bibtex.bibtex_entry_from_pmid(pmid=TEST_PMID)
+
+
+def test_bibtex_entry_from_pmid_retry(caplog) -> None:
+    with unittest.mock.patch(
+        "pubmed_bibtex._TeXMedHtmlParser.feed"
+    ) as feed_mock, caplog.at_level(logging.WARNING):
+        assert pubmed_bibtex.bibtex_entry_from_pmid(pmid=TEST_PMID, retries=2) is None
+    assert feed_mock.call_count == 3
+    assert caplog.record_tuples == [
+        ("pubmed_bibtex", logging.WARNING, "attempt #1/3 to fetch bibtex entry failed"),
+        ("pubmed_bibtex", logging.WARNING, "attempt #2/3 to fetch bibtex entry failed"),
+        ("pubmed_bibtex", logging.ERROR, "attempt #3/3 to fetch bibtex entry failed"),
+    ]

+ 2 - 2
tests/test_main.py

@@ -25,7 +25,7 @@ def test_script_module() -> None:
         stdout=subprocess.PIPE,
         stderr=subprocess.PIPE,
     )
-    assert not proc_info.stderr
+    assert all(l.startswith(b"attempt #") for l in proc_info.stderr.splitlines())
     assert proc_info.stdout == TEST_BIBTEX_ENTRY.encode()
 
 
@@ -36,7 +36,7 @@ def test_script() -> None:
         stdout=subprocess.PIPE,
         stderr=subprocess.PIPE,
     )
-    assert not proc_info.stderr
+    assert all(l.startswith(b"attempt #") for l in proc_info.stderr.splitlines())
     assert proc_info.stdout == TEST_BIBTEX_ENTRY.encode()