123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151 |
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- # PYTHON_ARGCOMPLETE_OK
- import re
- import os
- import sys
- import yaml
- import email
- import pprint
- import random
- import locale
- import argparse
- import datetime
- import traceback
- import argcomplete
- # strptime
- locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
- def parse_amazon(msg):
- order = {
- 'platform': 'amazon.de',
- }
- msg_text = msg.get_payload(decode = True)
- order['order_id'] = re.search(r'Bestellnummer #(.+)', msg_text).group(1)
- order_date = datetime.datetime.strptime(
- re.search(r'Aufgegeben am (.+)', msg_text).group(1),
- '%d. %B %Y'
- )
- order['order_date'] = order_date.strftime('%Y-%m-%d')
- order['articles'] = []
- articles_text = msg_text.split('Bestellte(r) Artikel:')[1].split('_' * 10)[0].strip()
- for article_text in articles_text.split('\n\n'):
- article_match = re.match(
- ur' *(?P<name>.*)\n'
- + ur'( *von (?P<authors>.*)\n)?'
- + ur' *(?P<price_brutto_currency>[A-Z]+) (?P<price_brutto>\d+,\d+)\n'
- + ur'( *Zustand: (?P<state>.*)\n)?'
- + ur' *Verkauft von: (?P<reseller>.*)'
- + ur'(\n *Versand durch (?P<shipper>.*))?',
- article_text,
- re.MULTILINE | re.UNICODE
- )
- if article_match is None:
- sys.stderr.write(repr(article_text) + '\n')
- raise Exception('could not match article')
- article = article_match.groupdict()
- if article['authors']:
- article['authors'] = article['authors'].split(',')
- else:
- del article['authors']
- article['price_brutto'] = float(article['price_brutto'].replace(',', '.'))
- order['articles'].append(article)
- return order
- def parse_oebb(msg):
- msg_text = msg.get_payload(decode = True).decode('utf8')
- order_match = re.search(
- ur'Booking code: (?P<order_id>[\d ]+)\s+'
- + ur'Customer number: (?P<customer_id>PV\d+)\s+'
- + ur'Booking date: (?P<order_date>.* \d{4})\s',
- msg_text,
- re.MULTILINE | re.UNICODE
- )
- order = order_match.groupdict()
- order['platform'] = 'oebb.at'
- locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
- order['order_date'] = datetime.datetime.strptime(
- order['order_date'],
- '%b %d, %Y'
- ).strftime('%Y-%m-%d')
- article_match = re.search(
- ur'(?P<price_brutto_currency>.)(?P<price_brutto>\d+\.\d+)'
- + ur'[\W\w]+'
- + ur'Your Booking\s+'
- + ur'(?P<departure_point>.*) > (?P<destination_point>.*)',
- msg_text,
- re.MULTILINE | re.UNICODE
- )
- article = article_match.groupdict()
- article['name'] = 'Train Ticket'
- article['price_brutto'] = float(article['price_brutto'])
- if article['price_brutto_currency'] == u'€':
- article['price_brutto_currency'] = 'EUR'
- else:
- raise Exception('currency %s is not supported' % article['price_brutto_currency'])
- order['articles'] = [article]
- return order
- def parse(msg):
- tracebacks = {}
- try:
- return parse_amazon(msg)
- except:
- tracebacks['amazon'] = traceback.format_exc()
- try:
- return parse_oebb(msg)
- except:
- tracebacks['oebb'] = traceback.format_exc()
- for parser_name in tracebacks:
- print('%s parser: \n%s' % (parser_name, tracebacks[parser_name]))
- print('failed')
- # raise Exception('failed to parse')
- def compute():
- msg = email.message_from_string(sys.stdin.read())
- orders = []
- if msg.is_multipart():
- for part in msg.get_payload():
- orders.append(parse(part))
- else:
- orders.append(parse(msg))
- print(yaml.safe_dump(orders, default_flow_style = False))
- def _init_argparser():
- argparser = argparse.ArgumentParser(description = None)
- return argparser
- def main(argv):
- argparser = _init_argparser()
- argcomplete.autocomplete(argparser)
- args = argparser.parse_args(argv)
- compute(**vars(args))
- return 0
- if __name__ == "__main__":
- sys.exit(main(sys.argv[1:]))
|