|
@@ -0,0 +1,43 @@
|
|
|
|
+import argparse
|
|
|
|
+import html.parser
|
|
|
|
+import re
|
|
|
|
+
|
|
|
|
+import requests
|
|
|
|
+
|
|
|
|
+_TEXMED_URL_PATTERN = 'https://www.bioinformatics.org/texmed/cgi-bin' \
|
|
|
|
+ '/list.cgi?PMID={pmid}&linkOut'
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+class _TeXMedHtmlParser(html.parser.HTMLParser):
|
|
|
|
+
|
|
|
|
+ def __init__(self):
|
|
|
|
+ self.bibtex_entry = None
|
|
|
|
+ super().__init__()
|
|
|
|
+
|
|
|
|
+ @staticmethod
|
|
|
|
+ def _strip_bibtex_entry(data: str) -> str:
|
|
|
|
+ return re.sub(r'\n\% \d+\s?\n', '', data).strip() + '\n'
|
|
|
|
+
|
|
|
|
+ def handle_data(self, data: str) -> None:
|
|
|
|
+ if 'Author' in data:
|
|
|
|
+ self.bibtex_entry = self._strip_bibtex_entry(data)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def bibtex_entry_from_pmid(pmid: str) -> str:
|
|
|
|
+ assert pmid.isdigit(), pmid
|
|
|
|
+ resp = requests.get(_TEXMED_URL_PATTERN.format(pmid=pmid))
|
|
|
|
+ resp.raise_for_status()
|
|
|
|
+ parser = _TeXMedHtmlParser()
|
|
|
|
+ parser.feed(resp.text)
|
|
|
|
+ return parser.bibtex_entry
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def main():
|
|
|
|
+ argparser = argparse.ArgumentParser()
|
|
|
|
+ argparser.add_argument('pmid')
|
|
|
|
+ args = argparser.parse_args()
|
|
|
|
+ print(bibtex_entry_from_pmid(pmid=args.pmid),
|
|
|
|
+ end='')
|
|
|
|
+
|
|
|
|
+if __name__ == '__main__':
|
|
|
|
+ main()
|