123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562 |
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- # PYTHON_ARGCOMPLETE_OK
- import dingguo
- import re
- import os
- import sys
- import yaml
- import email
- import shutil
- import pprint
- import random
- import locale
- import argparse
- import datetime
- import traceback
- import subprocess
- import HTMLParser
- import argcomplete
- import BeautifulSoup
- def parse_amazon(msg):
- msg_text = msg.get_payload()[0].get_payload(decode = True).decode('utf-8')
- if not u'Amazon.de Bestellbestätigung' in msg_text:
- raise Exception('no amazon order confirmation')
- orders = []
- for order_text in re.split(ur'={32,}', msg_text)[1:-1]:
- order_id = re.search(r'Bestellnummer #(.+)', order_text).group(1)
- order_date_formatted = re.search(ur'Aufgegeben am (.+)', order_text, re.UNICODE).group(1)
- locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
- order_date = datetime.datetime.strptime(order_date_formatted.encode('utf-8'), '%d. %B %Y')
- order = dingguo.Order(
- u'amazon.de',
- order_id,
- order_date
- )
- articles_text = order_text.split('Bestellte(r) Artikel:')[1].split('_' * 10)[0].strip()
- for article_text in re.split(ur'\n\t*\n', articles_text):
- article_match = re.match(
- ur' *((?P<quantity>\d+) x )?(?P<name>.*)\n'
- + ur'( *von (?P<authors>.*)\n)?'
- + ur' *(?P<price_brutto_currency>[A-Z]+) (?P<price_brutto>\d+,\d+)\n'
- + ur'( *Zustand: (?P<state>.*)\n)?'
- + ur' *Verkauft von: (?P<reseller>.*)'
- + ur'(\n *Versand durch (?P<shipper>.*))?',
- article_text,
- re.MULTILINE | re.UNICODE
- )
- if article_match is None:
- sys.stderr.write(repr(article_text) + '\n')
- raise Exception('could not match article')
- article = article_match.groupdict()
- order.items.append(dingguo.Article(
- name = article['name'],
- price_brutto = dingguo.Sum(
- float(article['price_brutto'].replace(',', '.')),
- article['price_brutto_currency']
- ),
- quantity = int(article['quantity']) if article['quantity'] else 1,
- authors = article['authors'].split(',') if article['authors'] else [],
- state = article['state'],
- reseller = article['reseller'],
- shipper = article['shipper'],
- ))
- orders.append(order)
- return orders
- def parse_oebb(msg):
- msg = msg.get_payload()[0]
- if type(msg.get_payload()) is list:
- msg = msg.get_payload()[0]
- msg_text = msg.get_payload(decode = True).decode('utf8')
- # msg_text = re.sub(
- # r'<[^>]+>',
- # '',
- # HTMLParser.HTMLParser().unescape(msg.get_payload(decode = True).decode('utf8'))
- # )
- order_match = re.search(
- ur'Booking code:\s+(?P<order_id>[\d ]+)\s+'
- + ur'Customer number:\s+(?P<customer_id>PV\d+)\s+'
- + ur'Booking date:\s+(?P<order_date>.* \d{4})\s',
- msg_text,
- re.MULTILINE | re.UNICODE
- )
- order_match_groups = order_match.groupdict()
- locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
- order_date = datetime.datetime.strptime(
- order_match_groups['order_date'],
- '%b %d, %Y'
- )
- order = dingguo.Order(
- u'oebb',
- order_match_groups['order_id'],
- order_date,
- customer_id = order_match_groups['customer_id'],
- )
- item_match = re.search(
- ur'(?P<price_brutto_currency>.)(?P<price_brutto>\d+\.\d+)'
- + ur'[\W\w]+'
- + ur'Your Booking\s+'
- + ur'(?P<departure_point>.*)\s+>\s+(?P<destination_point>.*)',
- msg_text,
- re.MULTILINE | re.UNICODE
- )
- item = item_match.groupdict()
- order.items.append(dingguo.Transportation(
- name = u'Train Ticket',
- price_brutto = dingguo.Sum(
- float(item['price_brutto']),
- item['price_brutto_currency'],
- ),
- departure_point = item['departure_point'],
- destination_point = item['destination_point'],
- ))
- return [order]
- def parse_mytaxi(msg):
- if not 'mytaxi' in msg.get_payload()[0].get_payload()[0].get_payload(decode = True):
- raise Exception('no mytaxi mail')
- pdf_compressed = msg.get_payload()[1].get_payload(decode = True)
- pdftk = subprocess.Popen(
- ['pdftk - output - uncompress'],
- shell = True,
- stdin = subprocess.PIPE,
- stdout = subprocess.PIPE,
- )
- pdf_uncompressed = pdftk.communicate(
- input = pdf_compressed,
- )[0].decode('latin-1')
- assert type(pdf_uncompressed) is unicode
- order_match = re.search(
- ur'Rechnungsnummer:[^\(]+\((?P<order_id>\w+)\)',
- pdf_uncompressed,
- re.MULTILINE | re.UNICODE
- )
- order_id = order_match.groupdict()['order_id']
- ride_match_groups = re.search(
- ur'\(Bruttobetrag\)'
- + ur'[^\(]+'
- + ur'\((?P<price_brutto>\d+,\d+) (?P<price_brutto_currency>.+)\)'
- + ur'[\w\W]+'
- + ur'\((?P<driver>[^\(]+)\)'
- + ur'[^\(]+'
- + ur'\(\d+,\d+ .\)'
- + ur'[^\(]+'
- + ur'\((?P<name>Taxifahrt)'
- + ur'[^\(]+'
- + ur'\(von: (?P<departure_point>[^\)]+)'
- + ur'[^\(]+'
- + ur'\(nach: (?P<destination_point>[^\)]+)'
- + ur'[\w\W]+'
- + ur'Belegdatum \\\(Leistungszeitpunkt\\\):[^\(]+\((?P<arrival_time>\d\d.\d\d.\d\d \d\d:\d\d)\)',
- pdf_uncompressed,
- re.MULTILINE | re.UNICODE
- ).groupdict()
- arrival_time = datetime.datetime.strptime(
- ride_match_groups['arrival_time'],
- '%d.%m.%y %H:%M'
- )
- order = dingguo.Order(
- u'mytaxi',
- order_id,
- arrival_time,
- )
- locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
- order.items.append(dingguo.TaxiRide(
- price_brutto = dingguo.Sum(
- float(ride_match_groups['price_brutto'].replace(',', '.')),
- # why 0x80 ?
- u'EUR' if (ride_match_groups['price_brutto_currency'] == u'\x80')
- else ride_match_groups['price_brutto_currency'],
- ),
- departure_point = ride_match_groups['departure_point'],
- destination_point = ride_match_groups['destination_point'],
- driver = ride_match_groups['driver'],
- arrival_time = arrival_time,
- ))
- return [order]
- def parse_uber(msg):
- html = msg.get_payload()[0].get_payload(decode = True)
- """ document in html2 has the same structure as the one in html.
- only difference is that hyperlink urls in html2 have been
- replaced by 'email.uber.com/wf/click?upn=.*' urls.
- """
- html2 = msg.get_payload()[1].get_payload()[0].get_payload(decode = True)
- route_map = msg.get_payload()[1].get_payload()[1].get_payload(decode = True)
- doc = BeautifulSoup.BeautifulSoup(
- html,
- convertEntities = BeautifulSoup.BeautifulSoup.HTML_ENTITIES,
- )
- # strptime
- locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
- trip_id = re.search(
- ur'[\da-f\-]{36}',
- doc.find(text = 'Visit the trip page').parent['href'],
- ).group(0)
- order = dingguo.Order(
- u'uber',
- trip_id,
- datetime.datetime.strptime(
- doc.find(attrs = {'class': 'date'}).text,
- '%B %d, %Y',
- ),
- )
- departure_time_tag = doc.find(attrs = {'class': 'from time'})
- departure_time = datetime.datetime.strptime(
- departure_time_tag.text,
- '%I:%M%p',
- ).time()
- arrival_time_tag = doc.find(attrs = {'class': 'to time'})
- arrival_time = datetime.datetime.strptime(
- arrival_time_tag.text,
- '%I:%M%p',
- ).time()
- distance = dingguo.Distance(
- float(doc.find(text = 'kilometers').parent.parent.find(attrs = {'class': 'data'}).text),
- u'km',
- )
- fare = doc.find(attrs = {'class': 'header-price'}).find(attrs = {'class': 'header-fare text-pad'}).text
- order.items.append(dingguo.TaxiRide(
- name = doc.find(text = 'CAR').parent.parent.find(attrs = {'class': 'data'}).text + ' Ride',
- price_brutto = dingguo.Sum(float(fare[1:]), fare[0]),
- arrival_time = datetime.datetime.combine(order.order_date, arrival_time),
- departure_time = datetime.datetime.combine(order.order_date, departure_time),
- departure_point = departure_time_tag.parent.find(attrs = {'class': 'address'}).text,
- destination_point = arrival_time_tag.parent.find(attrs = {'class': 'address'}).text,
- distance = distance,
- driver = doc.find(attrs = {'class': 'driver-info'}).text[len('You rode with '):],
- route_map = route_map,
- ))
- return [order]
- def parse_yipbee(msg):
- text = msg.get_payload()[0].get_payload()[0].get_payload(decode = True).decode('utf-8')
- if not u'Vielen Dank für deine Bestellung bei yipbee' in text:
- raise Exception('no yipbee confirmation')
- order_match_groups = re.search(
- ur'[\W\w]+'
- + ur'BESTELLUNG: (?P<order_id>\w+) vom (?P<order_time>\d\d.\d\d.\d{4} \d\d:\d\d:\d\d)'
- + ur'[\W\w]+'
- + ur'GESAMTPREIS\s+'
- + ur'(?P<articles_and_discount_text>[\W\w]+)'
- + ur'(?P<summary_text>ARTIKEL [\W\w]+)',
- text,
- re.UNICODE
- ).groupdict()
- order = dingguo.Order(
- u'yipbee',
- order_match_groups['order_id'],
- datetime.datetime.strptime(order_match_groups['order_time'], '%d.%m.%Y %H:%M:%S'),
- )
- for article_match in re.finditer(
- ur'(?P<name>[\w\-\.\:,%\(\) ]+ (Klasse \d|[\w\-\. ]+[^\d ]))'
- + ur'(?P<total_price>\d+,\d\d) €(?P<quantity>\d)(?P<total_price_2>\d+,\d\d) €',
- order_match_groups['articles_and_discount_text'].replace('\n', ' '),
- re.UNICODE,
- ):
- article_match_groups = article_match.groupdict()
- total_price = float(article_match_groups['total_price'].replace(',', '.'))
- total_price_2 = float(article_match_groups['total_price_2'].replace(',', '.'))
- assert abs(total_price - total_price_2) < 0.01, 'expected %f, received %f' % (total_price, total_price_2)
- quantity = int(article_match_groups['quantity'])
- order.items.append(dingguo.Article(
- name = article_match_groups['name'],
- price_brutto = dingguo.Sum(round(total_price / quantity, 2), u'EUR'),
- quantity = quantity,
- reseller = u'yipbee',
- shipper = u'yipbee',
- ))
- articles_price = float(text.split('RABATTE')[0].split('ARTIKEL')[-1].strip().split(' ')[0].replace(',', '.'))
- assert abs(articles_price - sum([a.price_brutto.value * a.quantity for a in order.items])) < 0.01
- discount_tag = BeautifulSoup.BeautifulSoup(
- order_match_groups['articles_and_discount_text'],
- convertEntities = BeautifulSoup.BeautifulSoup.HTML_ENTITIES,
- ).find('tr')
- if discount_tag:
- name_tag, value_tag = discount_tag.findAll('td', recursive = False)
- value, currency = value_tag.text.split(' ')
- order.discounts.append(dingguo.Discount(
- name = name_tag.text,
- amount = dingguo.Sum(float(value.replace(',', '.')) * -1, currency),
- ))
- delivery_price = order_match_groups['summary_text'].split('VERSAND')[1].split('STEUERN')[0].strip()
- delivery_price_value, delivery_price_currency = delivery_price.split(' ')
- order.items.append(dingguo.Item(
- name = u'Delivery',
- price_brutto = dingguo.Sum(float(delivery_price_value.replace(',', '.')), delivery_price_currency),
- ))
- return [order]
- def parse_yipbee_html(msg):
- html = msg.get_payload()[0].get_payload()[1].get_payload(decode = True)
- if not 'yipbee' in html:
- raise Exception('no yipbee confirmation')
- doc = BeautifulSoup.BeautifulSoup(html, convertEntities = BeautifulSoup.BeautifulSoup.HTML_ENTITIES)
- content_table = doc.find('table')
- order_match_groups = re.search(
- ur'Bestellung:(?P<order_id>\w+) vom (?P<order_time>\d\d.\d\d.\d{4} \d\d:\d\d:\d\d)',
- content_table.find('table').findAll('tr')[3].text,
- re.UNICODE
- ).groupdict()
- order = dingguo.Order(
- u'yipbee',
- order_match_groups['order_id'],
- datetime.datetime.strptime(order_match_groups['order_time'], '%d.%m.%Y %H:%M:%S'),
- )
- articles_table = content_table.find('table').find('tbody').findAll('tr', recursive = False)[4].find('table')
- for article_row in articles_table.find('tbody').findAll('tr', recursive = False)[1:]:
- article_columns = article_row.findAll('td', recursive = False)
- (price, currency) = re.sub(ur'\s+', ' ', article_columns[2].text.replace(u',', u'.')).split(' ')
- order.items.append(dingguo.Article(
- name = article_columns[1].text,
- price_brutto = dingguo.Sum(float(price), currency),
- quantity = int(article_columns[3].text),
- reseller = u'yipbee',
- shipper = u'yipbee',
- ))
- discount_row = content_table.find('table').find('tbody').findAll('tr', recursive = False)[6]
- (discount_name, discount_value_with_currency) = [c.text for c in discount_row.findAll('td', recursive = False)]
- (discount_value, discount_currency) = discount_value_with_currency.split(' ')
- order.discounts.append(dingguo.Discount(
- name = discount_name,
- amount = dingguo.Sum(float(discount_value.replace(',', '.')) * -1, discount_currency)
- ))
- shipping_costs_table = content_table.find('tbody').findAll('tr', recursive = False)[3].findAll('table')[1]
- (shipping_price, shipping_currency) = shipping_costs_table.text.replace(',', '.').split(' ')
- order.items.append(dingguo.Item(
- name = u'Delivery',
- price_brutto = dingguo.Sum(float(shipping_price), shipping_currency),
- ))
- return [order]
- def parse_lieferservice(msg):
- text = msg.get_payload()[0].get_payload(decode = True).decode('utf-8').replace('\r\n', '\n')
- assert type(text) is unicode
- if not 'Lieferservice.at' in text:
- raise Exception('no lieferservice.at confirmation')
- order_match = re.search(
- ur'(Your order|Ihre Bestellung) \(.+\) (at|bei) (?P<restaurant>.*)\s+'
- + ur'(Your order reference is|Ihre Bestellnummer lautet): (?P<order_id>.*)\s+'
- + ur'[\W\w]+'
- + ur'(Your order|Ihre Bestellung)\s+'
- + ur'(?P<orders_text>[\W\w]+)'
- + ur'(Delivery costs|Lieferung):\s+(?P<delivery_costs>.*)\s+',
- text,
- re.UNICODE,
- )
- order_match_groups = order_match.groupdict()
- import time
- import email.utils
- order_date = datetime.datetime.fromtimestamp(
- time.mktime(email.utils.parsedate(msg['Date']))
- )
- order = dingguo.Order(
- u'lieferservice.at',
- order_match_groups['order_id'].strip(),
- order_date
- )
- restaurant = order_match_groups['restaurant'].strip('"')
- for article_match in re.finditer(
- ur'(?P<quantity>\d+)x\s'
- + ur'(?P<name>.*)\s'
- + ur'(?P<currency>.) (?P<price>-?\d+,\d+)\s',
- order_match_groups['orders_text'],
- re.UNICODE,
- ):
- article_match_groups = article_match.groupdict()
- quantity = int(article_match_groups['quantity'])
- assert quantity == 1
- name = re.sub(ur' +', ' ', article_match_groups['name'])
- price = dingguo.Sum(
- float(article_match_groups['price'].replace(',', '.')),
- article_match_groups['currency'],
- )
- if price.value < 0:
- price.value *= -1
- order.discounts.append(dingguo.Discount(
- name = name,
- amount = price,
- ))
- else:
- order.items.append(dingguo.Article(
- name = name,
- quantity = 1,
- price_brutto = price,
- reseller = restaurant,
- shipper = restaurant,
- ))
- delivery_costs = order_match_groups['delivery_costs'].strip()
- if delivery_costs in ['FREE', 'GRATIS']:
- order.items.append(dingguo.Item(
- name = u'Delivery',
- price_brutto = dingguo.Sum(0.0, u'EUR'),
- ))
- else:
- unit, value = delivery_costs.split(' ')
- order.items.append(dingguo.Item(
- name = u'Delivery',
- price_brutto = dingguo.Sum(float(value.replace(',', '.')), unit),
- ))
- return [order]
- def parse(msg):
- tracebacks = {}
- try:
- return parse_amazon(msg)
- except:
- tracebacks['amazon'] = traceback.format_exc()
- try:
- return parse_oebb(msg)
- except:
- tracebacks['oebb'] = traceback.format_exc()
- try:
- return parse_lieferservice(msg)
- except:
- tracebacks['lieferservice'] = traceback.format_exc()
- try:
- return parse_mytaxi(msg)
- except:
- tracebacks['mytaxi'] = traceback.format_exc()
- try:
- return parse_uber(msg)
- except:
- tracebacks['uber'] = traceback.format_exc()
- try:
- return parse_yipbee(msg)
- except:
- tracebacks['yipbee'] = traceback.format_exc()
- for parser_name in tracebacks:
- sys.stderr.write('%s parser: \n%s\n' % (parser_name, tracebacks[parser_name]))
- raise Exception('failed to parse')
- def compute(mail_path, catalogue, register_path):
- orders = []
- if mail_path:
- for p in mail_path:
- with open(p, 'r') as mail:
- mail_orders = parse(email.message_from_file(mail))
- orders += mail_orders
- if catalogue:
- for order in mail_orders:
- order_dir_path = os.path.join(order.platform, order.order_id)
- if not os.path.isdir(order_dir_path):
- os.makedirs(order_dir_path)
- shutil.copyfile(p, os.path.join(order_dir_path, os.path.basename(p)))
- os.remove(p)
- else:
- msg = email.message_from_string(sys.stdin.read())
- orders += parse(msg)
- if register_path:
- with open(register_path, 'r') as register:
- registered_orders = yaml.load(register.read().decode('utf-8'))
- if not registered_orders:
- registered_orders = {}
- for order in orders:
- if order.platform not in registered_orders:
- registered_orders[order.platform] = {}
- if order.order_id in registered_orders[order.platform]:
- raise Exception('already registered')
- registered_orders[order.platform][order.order_id] = order
- with open(register_path, 'w') as register:
- register.write(yaml.safe_dump(registered_orders, default_flow_style = False))
- else:
- print(yaml.safe_dump(orders, default_flow_style = False))
- def _init_argparser():
- argparser = argparse.ArgumentParser(description = None)
- argparser.add_argument('--register', metavar = 'path', dest = 'register_path')
- argparser.add_argument('--catalogue', action='store_true')
- argparser.add_argument('mail_path', nargs = '*')
- return argparser
- def main(argv):
- argparser = _init_argparser()
- argcomplete.autocomplete(argparser)
- args = argparser.parse_args(argv)
- compute(**vars(args))
- return 0
- if __name__ == "__main__":
- sys.exit(main(sys.argv[1:]))
|