| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562 | 
							- #!/usr/bin/env python
 
- # -*- coding: utf-8 -*-
 
- # PYTHON_ARGCOMPLETE_OK
 
- import dingguo
 
- import re
 
- import os
 
- import sys
 
- import yaml
 
- import email
 
- import shutil
 
- import pprint
 
- import random
 
- import locale
 
- import argparse
 
- import datetime
 
- import traceback
 
- import subprocess
 
- import HTMLParser
 
- import argcomplete
 
- import BeautifulSoup
 
- def parse_amazon(msg):
 
-     msg_text = msg.get_payload()[0].get_payload(decode = True).decode('utf-8')
 
-     if not u'Amazon.de Bestellbestätigung' in msg_text:
 
-         raise Exception('no amazon order confirmation')
 
-     orders = []
 
-     for order_text in re.split(ur'={32,}', msg_text)[1:-1]:
 
-         order_id = re.search(r'Bestellnummer #(.+)', order_text).group(1)
 
-         order_date_formatted = re.search(ur'Aufgegeben am (.+)', order_text, re.UNICODE).group(1)
 
-         locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
 
-         order_date = datetime.datetime.strptime(order_date_formatted.encode('utf-8'), '%d. %B %Y')
 
-         order = dingguo.Order(
 
-             u'amazon.de',
 
-             order_id,
 
-             order_date
 
-             )
 
-         articles_text = order_text.split('Bestellte(r) Artikel:')[1].split('_' * 10)[0].strip()
 
-         for article_text in re.split(ur'\n\t*\n', articles_text):
 
-             article_match = re.match(
 
-                 ur' *((?P<quantity>\d+) x )?(?P<name>.*)\n'
 
-                     + ur'( *von (?P<authors>.*)\n)?'
 
-                     + ur' *(?P<price_brutto_currency>[A-Z]+) (?P<price_brutto>\d+,\d+)\n'
 
-                     + ur'( *Zustand: (?P<state>.*)\n)?'
 
-                     + ur' *Verkauft von: (?P<reseller>.*)'
 
-                     + ur'(\n *Versand durch (?P<shipper>.*))?',
 
-                 article_text,
 
-                 re.MULTILINE | re.UNICODE
 
-                 )
 
-             if article_match is None:
 
-                 sys.stderr.write(repr(article_text) + '\n')
 
-                 raise Exception('could not match article')
 
-             article = article_match.groupdict()
 
-             order.items.append(dingguo.Article(
 
-                 name = article['name'],
 
-                 price_brutto = dingguo.Sum(
 
-                     float(article['price_brutto'].replace(',', '.')),
 
-                     article['price_brutto_currency']
 
-                     ),
 
-                 quantity = int(article['quantity']) if article['quantity'] else 1,
 
-                 authors = article['authors'].split(',') if article['authors'] else [],
 
-                 state = article['state'],
 
-                 reseller = article['reseller'],
 
-                 shipper = article['shipper'],
 
-                 ))
 
-         orders.append(order)
 
-     return orders
 
- def parse_oebb(msg):
 
-     msg = msg.get_payload()[0]
 
-     if type(msg.get_payload()) is list:
 
-         msg = msg.get_payload()[0]
 
-     msg_text = msg.get_payload(decode = True).decode('utf8')
 
-     # msg_text = re.sub(
 
-     #     r'<[^>]+>',
 
-     #     '',
 
-     #     HTMLParser.HTMLParser().unescape(msg.get_payload(decode = True).decode('utf8'))
 
-     #     )
 
-     order_match = re.search(
 
-         ur'Booking code:\s+(?P<order_id>[\d ]+)\s+'
 
-             + ur'Customer number:\s+(?P<customer_id>PV\d+)\s+'
 
-             + ur'Booking date:\s+(?P<order_date>.* \d{4})\s',
 
-         msg_text,
 
-         re.MULTILINE | re.UNICODE
 
-         )
 
-     order_match_groups = order_match.groupdict()
 
-     locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
 
-     order_date = datetime.datetime.strptime(
 
-         order_match_groups['order_date'],
 
-         '%b %d, %Y'
 
-         )
 
-     order = dingguo.Order(
 
-         u'oebb',
 
-         order_match_groups['order_id'],
 
-         order_date,
 
-         customer_id = order_match_groups['customer_id'],
 
-         )
 
-     item_match = re.search(
 
-         ur'(?P<price_brutto_currency>.)(?P<price_brutto>\d+\.\d+)'
 
-             + ur'[\W\w]+'
 
-             + ur'Your Booking\s+'
 
-             + ur'(?P<departure_point>.*)\s+>\s+(?P<destination_point>.*)',
 
-         msg_text,
 
-         re.MULTILINE | re.UNICODE
 
-         )
 
-     item = item_match.groupdict()
 
-     order.items.append(dingguo.Transportation(
 
-         name = u'Train Ticket',
 
-         price_brutto = dingguo.Sum(
 
-             float(item['price_brutto']),
 
-             item['price_brutto_currency'],
 
-             ),
 
-         departure_point = item['departure_point'],
 
-         destination_point = item['destination_point'],
 
-         ))
 
-     return [order]
 
- def parse_mytaxi(msg):
 
-     if not 'mytaxi' in msg.get_payload()[0].get_payload()[0].get_payload(decode = True):
 
-         raise Exception('no mytaxi mail')
 
-     pdf_compressed = msg.get_payload()[1].get_payload(decode = True)
 
-     pdftk = subprocess.Popen(
 
-             ['pdftk - output - uncompress'],
 
-             shell = True,
 
-             stdin = subprocess.PIPE,
 
-             stdout = subprocess.PIPE,
 
-             )
 
-     pdf_uncompressed = pdftk.communicate(
 
-         input = pdf_compressed,
 
-         )[0].decode('latin-1')
 
-     assert type(pdf_uncompressed) is unicode
 
-     order_match = re.search(
 
-         ur'Rechnungsnummer:[^\(]+\((?P<order_id>\w+)\)',
 
-         pdf_uncompressed,
 
-         re.MULTILINE | re.UNICODE
 
-         )
 
-     order_id = order_match.groupdict()['order_id']
 
-     ride_match_groups = re.search(
 
-         ur'\(Bruttobetrag\)'
 
-             + ur'[^\(]+'
 
-             + ur'\((?P<price_brutto>\d+,\d+) (?P<price_brutto_currency>.+)\)'
 
-             + ur'[\w\W]+'
 
-             + ur'\((?P<driver>[^\(]+)\)'
 
-             + ur'[^\(]+'
 
-             + ur'\(\d+,\d+ .\)'
 
-             + ur'[^\(]+'
 
-             + ur'\((?P<name>Taxifahrt)'
 
-             + ur'[^\(]+'
 
-             + ur'\(von: (?P<departure_point>[^\)]+)'
 
-             + ur'[^\(]+'
 
-             + ur'\(nach: (?P<destination_point>[^\)]+)'
 
-             + ur'[\w\W]+'
 
-             + ur'Belegdatum \\\(Leistungszeitpunkt\\\):[^\(]+\((?P<arrival_time>\d\d.\d\d.\d\d \d\d:\d\d)\)',
 
-         pdf_uncompressed,
 
-         re.MULTILINE | re.UNICODE
 
-         ).groupdict()
 
-     arrival_time = datetime.datetime.strptime(
 
-         ride_match_groups['arrival_time'],
 
-         '%d.%m.%y %H:%M'
 
-         )
 
-     order = dingguo.Order(
 
-         u'mytaxi',
 
-         order_id,
 
-         arrival_time,
 
-         )
 
-     locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
 
-     order.items.append(dingguo.TaxiRide(
 
-         price_brutto = dingguo.Sum(
 
-             float(ride_match_groups['price_brutto'].replace(',', '.')),
 
-             # why 0x80 ?
 
-             u'EUR' if (ride_match_groups['price_brutto_currency'] == u'\x80')
 
-                 else ride_match_groups['price_brutto_currency'],
 
-             ),
 
-         departure_point = ride_match_groups['departure_point'],
 
-         destination_point = ride_match_groups['destination_point'],
 
-         driver = ride_match_groups['driver'],
 
-         arrival_time = arrival_time,
 
-         ))
 
-     return [order]
 
- def parse_uber(msg):
 
-     html = msg.get_payload()[0].get_payload(decode = True)
 
-     """ document in html2 has the same structure as the one in html.
 
-         only difference is that hyperlink urls in html2 have been
 
-         replaced by 'email.uber.com/wf/click?upn=.*' urls.
 
-         """
 
-     html2 = msg.get_payload()[1].get_payload()[0].get_payload(decode = True)
 
-     route_map = msg.get_payload()[1].get_payload()[1].get_payload(decode = True)
 
-     doc = BeautifulSoup.BeautifulSoup(
 
-         html,
 
-         convertEntities = BeautifulSoup.BeautifulSoup.HTML_ENTITIES,
 
-         )
 
-     # strptime
 
-     locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
 
-     trip_id = re.search(
 
-         ur'[\da-f\-]{36}',
 
-         doc.find(text = 'Visit the trip page').parent['href'],
 
-         ).group(0)
 
-     order = dingguo.Order(
 
-         u'uber',
 
-         trip_id,
 
-         datetime.datetime.strptime(
 
-             doc.find(attrs = {'class': 'date'}).text,
 
-             '%B %d, %Y',
 
-             ),
 
-         )
 
-     departure_time_tag = doc.find(attrs = {'class': 'from time'})
 
-     departure_time = datetime.datetime.strptime(
 
-         departure_time_tag.text,
 
-         '%I:%M%p',
 
-         ).time()
 
-     arrival_time_tag = doc.find(attrs = {'class': 'to time'})
 
-     arrival_time = datetime.datetime.strptime(
 
-         arrival_time_tag.text,
 
-         '%I:%M%p',
 
-         ).time()
 
-     distance = dingguo.Distance(
 
-         float(doc.find(text = 'kilometers').parent.parent.find(attrs = {'class': 'data'}).text),
 
-         u'km',
 
-         )
 
-     fare = doc.find(attrs = {'class': 'header-price'}).find(attrs = {'class': 'header-fare text-pad'}).text
 
-     order.items.append(dingguo.TaxiRide(
 
-         name = doc.find(text = 'CAR').parent.parent.find(attrs = {'class': 'data'}).text + ' Ride',
 
-         price_brutto = dingguo.Sum(float(fare[1:]), fare[0]),
 
-         arrival_time = datetime.datetime.combine(order.order_date, arrival_time),
 
-         departure_time = datetime.datetime.combine(order.order_date, departure_time),
 
-         departure_point = departure_time_tag.parent.find(attrs = {'class': 'address'}).text,
 
-         destination_point = arrival_time_tag.parent.find(attrs = {'class': 'address'}).text,
 
-         distance = distance,
 
-         driver = doc.find(attrs = {'class': 'driver-info'}).text[len('You rode with '):],
 
-         route_map = route_map,
 
-         ))
 
-     return [order]
 
- def parse_yipbee(msg):
 
-     text = msg.get_payload()[0].get_payload()[0].get_payload(decode = True).decode('utf-8')
 
-     if not u'Vielen Dank für deine Bestellung bei yipbee' in text:
 
-         raise Exception('no yipbee confirmation')
 
-     order_match_groups = re.search(
 
-         ur'[\W\w]+'
 
-             + ur'BESTELLUNG: (?P<order_id>\w+) vom (?P<order_time>\d\d.\d\d.\d{4} \d\d:\d\d:\d\d)'
 
-             + ur'[\W\w]+'
 
-             + ur'GESAMTPREIS\s+'
 
-             + ur'(?P<articles_and_discount_text>[\W\w]+)'
 
-             + ur'(?P<summary_text>ARTIKEL [\W\w]+)',
 
-         text,
 
-         re.UNICODE
 
-         ).groupdict()
 
-     order = dingguo.Order(
 
-         u'yipbee',
 
-         order_match_groups['order_id'],
 
-         datetime.datetime.strptime(order_match_groups['order_time'], '%d.%m.%Y %H:%M:%S'),
 
-         )
 
-     for article_match in re.finditer(
 
-             ur'(?P<name>[\w\-\.\:,%\(\) ]+ (Klasse \d|[\w\-\. ]+[^\d ]))'
 
-                 + ur'(?P<total_price>\d+,\d\d) €(?P<quantity>\d)(?P<total_price_2>\d+,\d\d) €',
 
-             order_match_groups['articles_and_discount_text'].replace('\n', ' '),
 
-             re.UNICODE,
 
-             ):
 
-         article_match_groups = article_match.groupdict()
 
-         total_price = float(article_match_groups['total_price'].replace(',', '.'))
 
-         total_price_2 = float(article_match_groups['total_price_2'].replace(',', '.'))
 
-         assert abs(total_price - total_price_2) < 0.01, 'expected %f, received %f' % (total_price, total_price_2)
 
-         quantity = int(article_match_groups['quantity'])
 
-         order.items.append(dingguo.Article(
 
-             name = article_match_groups['name'],
 
-             price_brutto = dingguo.Sum(round(total_price / quantity, 2), u'EUR'),
 
-             quantity = quantity,
 
-             reseller = u'yipbee',
 
-             shipper = u'yipbee',
 
-             ))
 
-     articles_price = float(text.split('RABATTE')[0].split('ARTIKEL')[-1].strip().split(' ')[0].replace(',', '.'))
 
-     assert abs(articles_price - sum([a.price_brutto.value * a.quantity for a in order.items])) < 0.01
 
-     discount_tag = BeautifulSoup.BeautifulSoup(
 
-         order_match_groups['articles_and_discount_text'],
 
-         convertEntities = BeautifulSoup.BeautifulSoup.HTML_ENTITIES,
 
-         ).find('tr')
 
-     if discount_tag:
 
-         name_tag, value_tag = discount_tag.findAll('td', recursive = False)
 
-         value, currency = value_tag.text.split(' ')
 
-         order.discounts.append(dingguo.Discount(
 
-             name = name_tag.text,
 
-             amount = dingguo.Sum(float(value.replace(',', '.')) * -1, currency),
 
-             ))
 
-     delivery_price = order_match_groups['summary_text'].split('VERSAND')[1].split('STEUERN')[0].strip()
 
-     delivery_price_value, delivery_price_currency = delivery_price.split(' ')
 
-     order.items.append(dingguo.Item(
 
-         name = u'Delivery',
 
-         price_brutto = dingguo.Sum(float(delivery_price_value.replace(',', '.')), delivery_price_currency),
 
-         ))
 
-     return [order]
 
- def parse_yipbee_html(msg):
 
-     html = msg.get_payload()[0].get_payload()[1].get_payload(decode = True)
 
-     if not 'yipbee' in html:
 
-         raise Exception('no yipbee confirmation')
 
-     doc = BeautifulSoup.BeautifulSoup(html, convertEntities = BeautifulSoup.BeautifulSoup.HTML_ENTITIES)
 
-     content_table = doc.find('table')
 
-     order_match_groups = re.search(
 
-         ur'Bestellung:(?P<order_id>\w+) vom (?P<order_time>\d\d.\d\d.\d{4} \d\d:\d\d:\d\d)',
 
-         content_table.find('table').findAll('tr')[3].text,
 
-         re.UNICODE
 
-         ).groupdict()
 
-     order = dingguo.Order(
 
-         u'yipbee',
 
-         order_match_groups['order_id'],
 
-         datetime.datetime.strptime(order_match_groups['order_time'], '%d.%m.%Y %H:%M:%S'),
 
-         )
 
-     articles_table = content_table.find('table').find('tbody').findAll('tr', recursive = False)[4].find('table')
 
-     for article_row in articles_table.find('tbody').findAll('tr', recursive = False)[1:]:
 
-         article_columns = article_row.findAll('td', recursive = False)
 
-         (price, currency) = re.sub(ur'\s+', ' ', article_columns[2].text.replace(u',', u'.')).split(' ')
 
-         order.items.append(dingguo.Article(
 
-             name = article_columns[1].text,
 
-             price_brutto = dingguo.Sum(float(price), currency),
 
-             quantity = int(article_columns[3].text),
 
-             reseller = u'yipbee',
 
-             shipper = u'yipbee',
 
-             ))
 
-     discount_row = content_table.find('table').find('tbody').findAll('tr', recursive = False)[6]
 
-     (discount_name, discount_value_with_currency) = [c.text for c in discount_row.findAll('td', recursive = False)]
 
-     (discount_value, discount_currency) = discount_value_with_currency.split(' ')
 
-     order.discounts.append(dingguo.Discount(
 
-         name = discount_name,
 
-         amount = dingguo.Sum(float(discount_value.replace(',', '.')) * -1, discount_currency)
 
-         ))
 
-     shipping_costs_table = content_table.find('tbody').findAll('tr', recursive = False)[3].findAll('table')[1]
 
-     (shipping_price, shipping_currency) = shipping_costs_table.text.replace(',', '.').split(' ')
 
-     order.items.append(dingguo.Item(
 
-         name = u'Delivery',
 
-         price_brutto = dingguo.Sum(float(shipping_price), shipping_currency),
 
-         ))
 
-     return [order]
 
- def parse_lieferservice(msg):
 
-     text = msg.get_payload()[0].get_payload(decode = True).decode('utf-8').replace('\r\n', '\n')
 
-     assert type(text) is unicode
 
-     if not 'Lieferservice.at' in text:
 
-         raise Exception('no lieferservice.at confirmation')
 
-     order_match = re.search(
 
-             ur'(Your order|Ihre Bestellung) \(.+\) (at|bei) (?P<restaurant>.*)\s+'
 
-                 + ur'(Your order reference is|Ihre Bestellnummer lautet): (?P<order_id>.*)\s+'
 
-                 + ur'[\W\w]+'
 
-                 + ur'(Your order|Ihre Bestellung)\s+'
 
-                 + ur'(?P<orders_text>[\W\w]+)'
 
-                 + ur'(Delivery costs|Lieferung):\s+(?P<delivery_costs>.*)\s+',
 
-             text,
 
-             re.UNICODE,
 
-             )
 
-     order_match_groups = order_match.groupdict()
 
-     import time
 
-     import email.utils
 
-     order_date = datetime.datetime.fromtimestamp(
 
-             time.mktime(email.utils.parsedate(msg['Date']))
 
-             )
 
-     order = dingguo.Order(
 
-         u'lieferservice.at',
 
-         order_match_groups['order_id'].strip(),
 
-         order_date
 
-         )
 
-     restaurant = order_match_groups['restaurant'].strip('"')
 
-     for article_match in re.finditer(
 
-             ur'(?P<quantity>\d+)x\s'
 
-                 + ur'(?P<name>.*)\s'
 
-                 + ur'(?P<currency>.) (?P<price>-?\d+,\d+)\s',
 
-             order_match_groups['orders_text'],
 
-             re.UNICODE,
 
-             ):
 
-         article_match_groups = article_match.groupdict()
 
-         quantity = int(article_match_groups['quantity'])
 
-         assert quantity == 1
 
-         name = re.sub(ur' +', ' ', article_match_groups['name'])
 
-         price = dingguo.Sum(
 
-             float(article_match_groups['price'].replace(',', '.')),
 
-             article_match_groups['currency'],
 
-             )
 
-         if price.value < 0:
 
-             price.value *= -1
 
-             order.discounts.append(dingguo.Discount(
 
-                 name = name,
 
-                 amount = price,
 
-                 ))
 
-         else:
 
-             order.items.append(dingguo.Article(
 
-                 name = name,
 
-                 quantity = 1,
 
-                 price_brutto = price,
 
-                 reseller = restaurant,
 
-                 shipper = restaurant,
 
-                 ))
 
-     delivery_costs = order_match_groups['delivery_costs'].strip()
 
-     if delivery_costs in ['FREE', 'GRATIS']:
 
-         order.items.append(dingguo.Item(
 
-             name = u'Delivery',
 
-             price_brutto = dingguo.Sum(0.0, u'EUR'),
 
-             ))
 
-     else:
 
-         unit, value = delivery_costs.split(' ')
 
-         order.items.append(dingguo.Item(
 
-             name = u'Delivery',
 
-             price_brutto = dingguo.Sum(float(value.replace(',', '.')), unit),
 
-             ))
 
-     return [order]
 
- def parse(msg):
 
-     tracebacks = {}
 
-     try:
 
-         return parse_amazon(msg)
 
-     except:
 
-         tracebacks['amazon'] = traceback.format_exc()
 
-     try:
 
-         return parse_oebb(msg)
 
-     except:
 
-         tracebacks['oebb'] = traceback.format_exc()
 
-     try:
 
-         return parse_lieferservice(msg)
 
-     except:
 
-         tracebacks['lieferservice'] = traceback.format_exc()
 
-     try:
 
-         return parse_mytaxi(msg)
 
-     except:
 
-         tracebacks['mytaxi'] = traceback.format_exc()
 
-     try:
 
-         return parse_uber(msg)
 
-     except:
 
-         tracebacks['uber'] = traceback.format_exc()
 
-     try:
 
-         return parse_yipbee(msg)
 
-     except:
 
-         tracebacks['yipbee'] = traceback.format_exc()
 
-     for parser_name in tracebacks:
 
-         sys.stderr.write('%s parser: \n%s\n' % (parser_name, tracebacks[parser_name]))
 
-     raise Exception('failed to parse')
 
- def compute(mail_path, catalogue, register_path):
 
-     orders = []
 
-     if mail_path:
 
-         for p in mail_path:
 
-             with open(p, 'r') as mail:
 
-                 mail_orders = parse(email.message_from_file(mail))
 
-                 orders += mail_orders
 
-             if catalogue:
 
-                 for order in mail_orders:
 
-                     order_dir_path = os.path.join(order.platform, order.order_id)
 
-                     if not os.path.isdir(order_dir_path):
 
-                         os.makedirs(order_dir_path)
 
-                     shutil.copyfile(p, os.path.join(order_dir_path, os.path.basename(p)))
 
-                 os.remove(p)
 
-     else:
 
-         msg = email.message_from_string(sys.stdin.read())
 
-         orders += parse(msg)
 
-     if register_path:
 
-         with open(register_path, 'r') as register:
 
-             registered_orders = yaml.load(register.read().decode('utf-8'))
 
-         if not registered_orders:
 
-             registered_orders = {}
 
-         for order in orders:
 
-             if order.platform not in registered_orders:
 
-                 registered_orders[order.platform] = {}
 
-             if order.order_id in registered_orders[order.platform]:
 
-                 raise Exception('already registered')
 
-             registered_orders[order.platform][order.order_id] = order
 
-         with open(register_path, 'w') as register:
 
-             register.write(yaml.safe_dump(registered_orders, default_flow_style = False))
 
-     else:
 
-         print(yaml.safe_dump(orders, default_flow_style = False))
 
- def _init_argparser():
 
-     argparser = argparse.ArgumentParser(description = None)
 
-     argparser.add_argument('--register', metavar = 'path', dest = 'register_path')
 
-     argparser.add_argument('--catalogue', action='store_true')
 
-     argparser.add_argument('mail_path', nargs = '*')
 
-     return argparser
 
- def main(argv):
 
-     argparser = _init_argparser()
 
-     argcomplete.autocomplete(argparser)
 
-     args = argparser.parse_args(argv)
 
-     compute(**vars(args))
 
-     return 0
 
- if __name__ == "__main__":
 
-     sys.exit(main(sys.argv[1:]))
 
 
  |