order-confirmation-mail-parser 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. # PYTHON_ARGCOMPLETE_OK
  4. import re
  5. import os
  6. import sys
  7. import yaml
  8. import email
  9. import pprint
  10. import random
  11. import locale
  12. import argparse
  13. import datetime
  14. import traceback
  15. import HTMLParser
  16. import argcomplete
  17. # strptime
  18. locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
  19. def parse_amazon(msg):
  20. order = {
  21. 'platform': 'amazon.de',
  22. }
  23. msg_text = msg.get_payload(decode = True)
  24. order['order_id'] = re.search(r'Bestellnummer #(.+)', msg_text).group(1)
  25. order_date = datetime.datetime.strptime(
  26. re.search(r'Aufgegeben am (.+)', msg_text).group(1),
  27. '%d. %B %Y'
  28. )
  29. order['order_date'] = order_date.strftime('%Y-%m-%d')
  30. order['articles'] = []
  31. articles_text = msg_text.split('Bestellte(r) Artikel:')[1].split('_' * 10)[0].strip()
  32. for article_text in articles_text.split('\n\n'):
  33. article_match = re.match(
  34. ur' *(?P<name>.*)\n'
  35. + ur'( *von (?P<authors>.*)\n)?'
  36. + ur' *(?P<price_brutto_currency>[A-Z]+) (?P<price_brutto>\d+,\d+)\n'
  37. + ur'( *Zustand: (?P<state>.*)\n)?'
  38. + ur' *Verkauft von: (?P<reseller>.*)'
  39. + ur'(\n *Versand durch (?P<shipper>.*))?',
  40. article_text,
  41. re.MULTILINE | re.UNICODE
  42. )
  43. if article_match is None:
  44. sys.stderr.write(repr(article_text) + '\n')
  45. raise Exception('could not match article')
  46. article = article_match.groupdict()
  47. if article['authors']:
  48. article['authors'] = article['authors'].split(',')
  49. else:
  50. del article['authors']
  51. article['price_brutto'] = float(article['price_brutto'].replace(',', '.'))
  52. order['articles'].append(article)
  53. return order
  54. def parse_oebb(msg):
  55. msg_text = re.sub(
  56. r'<[^>]+>',
  57. '',
  58. HTMLParser.HTMLParser().unescape(msg.get_payload(decode = True).decode('utf8'))
  59. )
  60. order_match = re.search(
  61. ur'Booking code:\s+(?P<order_id>[\d ]+)\s+'
  62. + ur'Customer number:\s+(?P<customer_id>PV\d+)\s+'
  63. + ur'Booking date:\s+(?P<order_date>.* \d{4})\s',
  64. msg_text,
  65. re.MULTILINE | re.UNICODE
  66. )
  67. order = order_match.groupdict()
  68. order['platform'] = 'oebb.at'
  69. locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
  70. order['order_date'] = datetime.datetime.strptime(
  71. order['order_date'],
  72. '%b %d, %Y'
  73. ).strftime('%Y-%m-%d')
  74. article_match = re.search(
  75. ur'(?P<price_brutto_currency>.)(?P<price_brutto>\d+\.\d+)'
  76. + ur'[\W\w]+'
  77. + ur'Your Booking\s+'
  78. + ur'(?P<departure_point>.*)\s+>\s+(?P<destination_point>.*)',
  79. msg_text,
  80. re.MULTILINE | re.UNICODE
  81. )
  82. article = article_match.groupdict()
  83. article['name'] = 'Train Ticket'
  84. article['price_brutto'] = float(article['price_brutto'])
  85. if article['price_brutto_currency'] == u'€':
  86. article['price_brutto_currency'] = 'EUR'
  87. else:
  88. raise Exception('currency %s is not supported' % article['price_brutto_currency'])
  89. order['articles'] = [article]
  90. return order
  91. def parse(msg):
  92. tracebacks = {}
  93. try:
  94. return parse_amazon(msg)
  95. except:
  96. tracebacks['amazon'] = traceback.format_exc()
  97. try:
  98. return parse_oebb(msg)
  99. except:
  100. tracebacks['oebb'] = traceback.format_exc()
  101. for parser_name in tracebacks:
  102. print('%s parser: \n%s' % (parser_name, tracebacks[parser_name]))
  103. print('failed')
  104. # raise Exception('failed to parse')
  105. def compute():
  106. msg = email.message_from_string(sys.stdin.read())
  107. orders = []
  108. if msg.is_multipart():
  109. for part in msg.get_payload():
  110. orders.append(parse(part))
  111. else:
  112. orders.append(parse(msg))
  113. print(yaml.safe_dump(orders, default_flow_style = False))
  114. def _init_argparser():
  115. argparser = argparse.ArgumentParser(description = None)
  116. return argparser
  117. def main(argv):
  118. argparser = _init_argparser()
  119. argcomplete.autocomplete(argparser)
  120. args = argparser.parse_args(argv)
  121. compute(**vars(args))
  122. return 0
  123. if __name__ == "__main__":
  124. sys.exit(main(sys.argv[1:]))