# -*- coding: utf-8 -*- import datetime import dingguo import email import ioex import re import subprocess def parse_order_confirmation_mail(mail): assert isinstance(mail, email.message.Message) if not 'mytaxi' in mail.get_payload()[0].get_payload()[0].get_payload(decode = True): raise Exception('no mytaxi mail') pdf_compressed = mail.get_payload()[1].get_payload(decode = True) pdftk = subprocess.Popen( ['pdftk - output - uncompress'], shell = True, stdin = subprocess.PIPE, stdout = subprocess.PIPE, ) pdf_uncompressed = pdftk.communicate( input = pdf_compressed, )[0].decode('latin-1') assert type(pdf_uncompressed) is unicode order_match = re.search( ur'Rechnungsnummer:[^\(]+\((?P\w+)\)', pdf_uncompressed, re.MULTILINE | re.UNICODE ) order_id = order_match.groupdict()['order_id'] ride_match_groups = re.search( ur'\(Bruttobetrag\)' + ur'[^\(]+' + ur'\((?P\d+,\d+) (?P.+)\)' + ur'[\w\W]+' + ur'\((?P[^\(]+)\)' + ur'[^\(]+' + ur'\(\d+,\d+ .\)' + ur'[^\(]+' + ur'\((?PTaxifahrt)' + ur'[^\(]+' + ur'\(von: (?P[^\)]+)' + ur'[^\(]+' + ur'\(nach: (?P[^\)]+)' + ur'[\w\W]+' + ur'Belegdatum \\\(Leistungszeitpunkt\\\):[^\(]+\((?P\d\d.\d\d.\d\d \d\d:\d\d)\)', pdf_uncompressed, re.MULTILINE | re.UNICODE ).groupdict() arrival_time = datetime.datetime.strptime( ride_match_groups['arrival_time'], '%d.%m.%y %H:%M' ) order = dingguo.Order( u'mytaxi', order_id, order_date = arrival_time.date(), ) with ioex.setlocale('en_US.UTF-8'): order.items.append(dingguo.TaxiRide( price_brutto = dingguo.Sum( float(ride_match_groups['price_brutto'].replace(',', '.')), # why 0x80 ? u'EUR' if (ride_match_groups['price_brutto_currency'] == u'\x80') else ride_match_groups['price_brutto_currency'], ), departure_point = ride_match_groups['departure_point'], destination_point = ride_match_groups['destination_point'], driver = ride_match_groups['driver'], arrival_time = arrival_time, )) return [order]