#!/usr/bin/env python # -*- coding: utf-8 -*- # PYTHON_ARGCOMPLETE_OK import re import os import sys import yaml import email import pprint import random import locale import argparse import datetime import traceback import HTMLParser import argcomplete # strptime locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') def parse_amazon(msg): order = { 'platform': 'amazon.de', } msg_text = msg.get_payload(decode = True) order['order_id'] = re.search(r'Bestellnummer #(.+)', msg_text).group(1) order_date = datetime.datetime.strptime( re.search(r'Aufgegeben am (.+)', msg_text).group(1), '%d. %B %Y' ) order['order_date'] = order_date.strftime('%Y-%m-%d') order['articles'] = [] articles_text = msg_text.split('Bestellte(r) Artikel:')[1].split('_' * 10)[0].strip() for article_text in articles_text.split('\n\n'): article_match = re.match( ur' *(?P.*)\n' + ur'( *von (?P.*)\n)?' + ur' *(?P[A-Z]+) (?P\d+,\d+)\n' + ur'( *Zustand: (?P.*)\n)?' + ur' *Verkauft von: (?P.*)' + ur'(\n *Versand durch (?P.*))?', article_text, re.MULTILINE | re.UNICODE ) if article_match is None: sys.stderr.write(repr(article_text) + '\n') raise Exception('could not match article') article = article_match.groupdict() if article['authors']: article['authors'] = article['authors'].split(',') else: del article['authors'] article['price_brutto'] = float(article['price_brutto'].replace(',', '.')) order['articles'].append(article) return order def parse_oebb(msg): msg_text = re.sub( r'<[^>]+>', '', HTMLParser.HTMLParser().unescape(msg.get_payload(decode = True).decode('utf8')) ) order_match = re.search( ur'Booking code:\s+(?P[\d ]+)\s+' + ur'Customer number:\s+(?PPV\d+)\s+' + ur'Booking date:\s+(?P.* \d{4})\s', msg_text, re.MULTILINE | re.UNICODE ) order = order_match.groupdict() order['platform'] = 'oebb.at' locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') order['order_date'] = datetime.datetime.strptime( order['order_date'], '%b %d, %Y' ).strftime('%Y-%m-%d') article_match = re.search( ur'(?P.)(?P\d+\.\d+)' + ur'[\W\w]+' + ur'Your Booking\s+' + ur'(?P.*)\s+>\s+(?P.*)', msg_text, re.MULTILINE | re.UNICODE ) article = article_match.groupdict() article['name'] = 'Train Ticket' article['price_brutto'] = float(article['price_brutto']) if article['price_brutto_currency'] == u'€': article['price_brutto_currency'] = 'EUR' else: raise Exception('currency %s is not supported' % article['price_brutto_currency']) order['articles'] = [article] return order def parse(msg): tracebacks = {} try: return parse_amazon(msg) except: tracebacks['amazon'] = traceback.format_exc() try: return parse_oebb(msg) except: tracebacks['oebb'] = traceback.format_exc() for parser_name in tracebacks: print('%s parser: \n%s' % (parser_name, tracebacks[parser_name])) print('failed') # raise Exception('failed to parse') def compute(): msg = email.message_from_string(sys.stdin.read()) orders = [] if msg.is_multipart(): for part in msg.get_payload(): orders.append(parse(part)) else: orders.append(parse(msg)) print(yaml.safe_dump(orders, default_flow_style = False)) def _init_argparser(): argparser = argparse.ArgumentParser(description = None) return argparser def main(argv): argparser = _init_argparser() argcomplete.autocomplete(argparser) args = argparser.parse_args(argv) compute(**vars(args)) return 0 if __name__ == "__main__": sys.exit(main(sys.argv[1:]))