Browse Source

derive from YAMLObject instead of defining representers

Fabian Peter Hammerle 8 years ago
parent
commit
0759d77c08

+ 45 - 9
dingguo/__init__.py

@@ -6,16 +6,22 @@ import datetime
 
 yaml.Dumper.add_representer(unicode, yaml.representer.SafeRepresenter.represent_unicode)
 
-class _YamlUnicodeConstruct(yaml.YAMLObject):
+yaml.Loader.add_constructor(
+    u'tag:yaml.org,2002:str',
+    lambda loader, node: unicode(loader.construct_scalar(node)),
+    )
+
+class _YamlInitConstructor(yaml.YAMLObject):
 
     @classmethod
     def from_yaml(cls, loader, node):
-        return cls(**{
-            k: unicode(v) if isinstance(v, str) else v
-                for (k, v) in loader.construct_mapping(node, deep = True).items()
-            })
+        return cls(**loader.construct_mapping(node, deep = True))
+        # return cls(**{
+        #     k: unicode(v) if isinstance(v, str) else v
+        #         for (k, v) in loader.construct_mapping(node, deep = True).items()
+        #     })
 
-class Figure(_YamlUnicodeConstruct):
+class Figure(_YamlInitConstructor):
 
     yaml_tag = u"!figure"
 
@@ -159,14 +165,15 @@ class Discount(yaml.YAMLObject):
                 )
 
     def __eq__(self, other):
-        return type(self) == type(other) and self.name == other.name and self.amount == other.amount
+        return (type(self) == type(other)
+                and vars(self) == vars(other))
 
     def __ne__(self, other):
         return not (self == other)
 
 yaml.SafeDumper.add_representer(Discount, lambda dumper, discount: dumper.represent_dict(discount.dict_repr()))
 
-class Order(_YamlUnicodeConstruct):
+class Order(_YamlInitConstructor):
 
     yaml_tag = u'!order'
 
@@ -240,7 +247,7 @@ class Order(_YamlUnicodeConstruct):
 
 yaml.SafeDumper.add_representer(Order, lambda dumper, order: dumper.represent_dict(order.dict_repr()))
 
-class Item(_YamlUnicodeConstruct):
+class Item(_YamlInitConstructor):
 
     yaml_tag = u'!item'
 
@@ -400,3 +407,32 @@ class TaxiRide(Transportation):
         return attr
 
 yaml.SafeDumper.add_representer(TaxiRide, lambda dumper, taxi_ride: dumper.represent_dict(taxi_ride.dict_repr()))
+
+class OrderRegistry(yaml.YAMLObject):
+
+    yaml_tag = u'!order-registry'
+
+    def __init__(self):
+        self.registry = {}
+
+    def register(self, order):
+        assert isinstance(order, Order)
+        if not order.platform in self.registry:
+            self.registry[order.platform] = {}
+        self.registry[order.platform][order.order_id] = order
+
+    @classmethod
+    def to_yaml(cls, dumper, self):
+        return dumper.represent_mapping(cls.yaml_tag, self.registry)
+
+    @classmethod
+    def from_yaml(cls, loader, node):
+        self = cls()
+        self.registry = loader.construct_mapping(node)
+        return self
+
+    def __eq__(self, other):
+        return type(self) == type(other) and vars(self) == vars(other)
+
+    def __ne__(self, other):
+        return not self == other

+ 37 - 0
dingguo/parser/__init__.py

@@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+
+import email
+import traceback
+
+import amazon
+import banggood
+import ingdiba
+import lieferservice
+import mytaxi
+import oebb
+import uber
+import yipbee
+
+order_confirmation_parsers = [
+    amazon.parse_order_confirmation_mail,
+    banggood.parse_order_confirmation_mail,
+    lieferservice.parse_order_confirmation_mail,
+    mytaxi.parse_order_confirmation_mail,
+    oebb.parse_order_confirmation_mail,
+    uber.parse_order_confirmation_mail,
+    yipbee.parse_order_confirmation_mail,
+    ]
+
+def parse_order_confirmation_mail(mail):
+
+    assert isinstance(mail, email.message.Message)
+
+    tracebacks = {}
+
+    for parser in order_confirmation_parsers:
+        try:
+            return parser(mail = mail)
+        except:
+            tracebacks['%s.%s' % (parser.__module__, parser.__name__)] = traceback.format_exc()
+
+    raise Exception('\n'.join(['%s:\n%s' % (f, t) for (f, t) in tracebacks.items()]))

+ 67 - 0
dingguo/parser/amazon.py

@@ -0,0 +1,67 @@
+# -*- coding: utf-8 -*-
+
+import datetime
+import dingguo
+import email
+import ioex
+import re
+
+def parse_order_confirmation_mail(mail):
+
+    assert isinstance(mail, email.message.Message)
+
+    msg_text = mail.get_payload()[0].get_payload(decode = True).decode('utf-8')
+
+    if not u'Amazon.de Bestellbestätigung' in msg_text:
+        raise Exception('no amazon order confirmation')
+
+    orders = []
+
+    for order_text in re.split(ur'={32,}', msg_text)[1:-1]:
+
+        order_id = re.search(r'Bestellnummer #(.+)', order_text).group(1)
+
+        order_date_formatted = re.search(ur'Aufgegeben am (.+)', order_text, re.UNICODE).group(1)
+        with ioex.setlocale('de_DE.UTF-8'):
+            order_date = datetime.datetime.strptime(
+                    order_date_formatted.encode('utf-8'),
+                    '%d. %B %Y',
+                    )
+
+        order = dingguo.Order(
+            u'amazon.de',
+            order_id,
+            order_date
+            )
+
+        articles_text = order_text.split('Bestellte(r) Artikel:')[1].split('_' * 10)[0].strip()
+        for article_text in re.split(ur'\n\t*\n', articles_text):
+            article_match = re.match(
+                ur' *((?P<quantity>\d+) x )?(?P<name>.*)\n'
+                    + ur'( *von (?P<authors>.*)\n)?'
+                    + ur' *(?P<price_brutto_currency>[A-Z]+) (?P<price_brutto>\d+,\d+)\n'
+                    + ur'( *Zustand: (?P<state>.*)\n)?'
+                    + ur' *Verkauft von: (?P<reseller>.*)'
+                    + ur'(\n *Versand durch (?P<shipper>.*))?',
+                article_text,
+                re.MULTILINE | re.UNICODE
+                )
+            assert article_match is not None, repr(article_text)
+            article = article_match.groupdict()
+            order.items.append(dingguo.Article(
+                name = article['name'],
+                price_brutto = dingguo.Sum(
+                    float(article['price_brutto'].replace(',', '.')),
+                    article['price_brutto_currency']
+                    ),
+                quantity = int(article['quantity']) if article['quantity'] else 1,
+                authors = article['authors'].split(',') if article['authors'] else None,
+                state = article['state'],
+                reseller = article['reseller'],
+                shipper = article['shipper'],
+                ))
+
+        orders.append(order)
+
+    return orders
+

+ 89 - 0
dingguo/parser/lieferservice.py

@@ -0,0 +1,89 @@
+# -*- coding: utf-8 -*-
+
+import datetime
+import dingguo
+import email
+import email.utils
+import ioex
+import re
+
+def parse_order_confirmation_mail(mail):
+
+    assert isinstance(mail, email.message.Message)
+
+    text = mail.get_payload()[0].get_payload(decode = True).decode('utf-8').replace('\r\n', '\n')
+    assert type(text) is unicode
+
+    if not 'Lieferservice.at' in text:
+        raise Exception('no lieferservice.at confirmation')
+    msg_text = mail.get_payload()[0].get_payload(decode = True).decode('utf-8')
+    
+    order_match = re.search(
+            ur'(Your order|Ihre Bestellung) \(.+\) (at|bei) (?P<restaurant>.*)\s+'
+                + ur'(Your order reference is|Ihre Bestellnummer lautet): (?P<order_id>.*)\s+'
+                + ur'[\W\w]+'
+                + ur'(Your order|Ihre Bestellung)\s+'
+                + ur'(?P<orders_text>[\W\w]+)'
+                + ur'(Delivery costs|Lieferung):\s+(?P<delivery_costs>.*)\s+',
+            text,
+            re.UNICODE,
+            )
+    order_match_groups = order_match.groupdict()
+
+    import time
+    order_date = datetime.datetime.fromtimestamp(
+            time.mktime(email.utils.parsedate(mail['Date']))
+            )
+
+    order = dingguo.Order(
+        u'lieferservice.at',
+        order_match_groups['order_id'].strip(),
+        order_date
+        )
+
+    restaurant = order_match_groups['restaurant'].strip('"')
+
+    for article_match in re.finditer(
+            ur'(?P<quantity>\d+)x\s'
+                + ur'(?P<name>.*)\s'
+                + ur'(?P<currency>.) (?P<price>-?\d+,\d+)\s',
+            order_match_groups['orders_text'],
+            re.UNICODE,
+            ):
+        article_match_groups = article_match.groupdict()
+        quantity = int(article_match_groups['quantity'])
+        assert quantity == 1
+        name = re.sub(ur' +', ' ', article_match_groups['name'])
+        price = dingguo.Sum(
+            float(article_match_groups['price'].replace(',', '.')),
+            article_match_groups['currency'],
+            )
+        if price.value < 0:
+            price.value *= -1
+            order.discounts.append(dingguo.Discount(
+                name = name,
+                amount = price,
+                ))
+        else:
+            order.items.append(dingguo.Article(
+                name = name,
+                quantity = 1,
+                price_brutto = price,
+                reseller = restaurant,
+                shipper = restaurant,
+                ))
+
+    delivery_costs = order_match_groups['delivery_costs'].strip()
+    if delivery_costs in ['FREE', 'GRATIS']:
+        order.items.append(dingguo.Item(
+            name = u'Delivery',
+            price_brutto = dingguo.Sum(0.0, u'EUR'),
+            ))
+    else:
+        unit, value = delivery_costs.split(' ')
+        order.items.append(dingguo.Item(
+            name = u'Delivery',
+            price_brutto = dingguo.Sum(float(value.replace(',', '.')), unit),
+            ))
+
+    return [order]

+ 79 - 0
dingguo/parser/mytaxi.py

@@ -0,0 +1,79 @@
+# -*- coding: utf-8 -*-
+
+import datetime
+import dingguo
+import email
+import ioex
+import re
+import subprocess
+
+def parse_order_confirmation_mail(mail):
+
+    assert isinstance(mail, email.message.Message)
+
+    if not 'mytaxi' in mail.get_payload()[0].get_payload()[0].get_payload(decode = True):
+        raise Exception('no mytaxi mail')
+
+    pdf_compressed = mail.get_payload()[1].get_payload(decode = True)
+    pdftk = subprocess.Popen(
+            ['pdftk - output - uncompress'],
+            shell = True,
+            stdin = subprocess.PIPE,
+            stdout = subprocess.PIPE,
+            )
+    pdf_uncompressed = pdftk.communicate(
+        input = pdf_compressed,
+        )[0].decode('latin-1')
+    assert type(pdf_uncompressed) is unicode
+
+    order_match = re.search(
+        ur'Rechnungsnummer:[^\(]+\((?P<order_id>\w+)\)',
+        pdf_uncompressed,
+        re.MULTILINE | re.UNICODE
+        )
+    order_id = order_match.groupdict()['order_id']
+
+    ride_match_groups = re.search(
+        ur'\(Bruttobetrag\)'
+            + ur'[^\(]+'
+            + ur'\((?P<price_brutto>\d+,\d+) (?P<price_brutto_currency>.+)\)'
+            + ur'[\w\W]+'
+            + ur'\((?P<driver>[^\(]+)\)'
+            + ur'[^\(]+'
+            + ur'\(\d+,\d+ .\)'
+            + ur'[^\(]+'
+            + ur'\((?P<name>Taxifahrt)'
+            + ur'[^\(]+'
+            + ur'\(von: (?P<departure_point>[^\)]+)'
+            + ur'[^\(]+'
+            + ur'\(nach: (?P<destination_point>[^\)]+)'
+            + ur'[\w\W]+'
+            + ur'Belegdatum \\\(Leistungszeitpunkt\\\):[^\(]+\((?P<arrival_time>\d\d.\d\d.\d\d \d\d:\d\d)\)',
+        pdf_uncompressed,
+        re.MULTILINE | re.UNICODE
+        ).groupdict()
+
+    arrival_time = datetime.datetime.strptime(
+        ride_match_groups['arrival_time'],
+        '%d.%m.%y %H:%M'
+        )
+
+    order = dingguo.Order(
+        u'mytaxi',
+        order_id,
+        arrival_time,
+        )
+    with ioex.setlocale('en_US.UTF-8'):
+        order.items.append(dingguo.TaxiRide(
+            price_brutto = dingguo.Sum(
+                float(ride_match_groups['price_brutto'].replace(',', '.')),
+                # why 0x80 ?
+                u'EUR' if (ride_match_groups['price_brutto_currency'] == u'\x80')
+                    else ride_match_groups['price_brutto_currency'],
+                ),
+            departure_point = ride_match_groups['departure_point'],
+            destination_point = ride_match_groups['destination_point'],
+            driver = ride_match_groups['driver'],
+            arrival_time = arrival_time,
+            ))
+    return [order]

+ 65 - 0
dingguo/parser/oebb.py

@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+
+import datetime
+import dingguo
+import email
+import ioex
+import re
+
+def parse_order_confirmation_mail(mail):
+
+    assert isinstance(mail, email.message.Message)
+
+    msg = mail.get_payload()[0]
+    if type(msg.get_payload()) is list:
+        msg = msg.get_payload()[0]
+    msg_text = msg.get_payload(decode = True).decode('utf8')
+
+    # msg_text = re.sub(
+    #     r'<[^>]+>',
+    #     '',
+    #     HTMLParser.HTMLParser().unescape(msg.get_payload(decode = True).decode('utf8'))
+    #     )
+
+    order_match = re.search(
+        ur'Booking code:\s+(?P<order_id>[\d ]+)\s+'
+            + ur'Customer number:\s+(?P<customer_id>PV\d+)\s+'
+            + ur'Booking date:\s+(?P<order_date>.* \d{4})\s',
+        msg_text,
+        re.MULTILINE | re.UNICODE
+        )
+    order_match_groups = order_match.groupdict()
+
+    with ioex.setlocale('en_US.UTF-8'):
+        order_date = datetime.datetime.strptime(
+            order_match_groups['order_date'],
+            '%b %d, %Y',
+            )
+
+    order = dingguo.Order(
+        u'oebb',
+        order_match_groups['order_id'],
+        order_date,
+        customer_id = order_match_groups['customer_id'],
+        )
+
+    item_match = re.search(
+        ur'(?P<price_brutto_currency>.)(?P<price_brutto>\d+\.\d+)'
+            + ur'[\W\w]+'
+            + ur'Your Booking\s+'
+            + ur'(?P<departure_point>.*)\s+>\s+(?P<destination_point>.*)',
+        msg_text,
+        re.MULTILINE | re.UNICODE
+        )
+    item = item_match.groupdict()
+    order.items.append(dingguo.Transportation(
+        name = u'Train Ticket',
+        price_brutto = dingguo.Sum(
+            float(item['price_brutto']),
+            item['price_brutto_currency'],
+            ),
+        departure_point = item['departure_point'],
+        destination_point = item['destination_point'],
+        ))
+
+    return [order]

+ 76 - 0
dingguo/parser/uber.py

@@ -0,0 +1,76 @@
+# -*- coding: utf-8 -*-
+
+import BeautifulSoup
+import datetime
+import dingguo
+import email
+import ioex
+import re
+
+def parse_order_confirmation_mail(mail):
+
+    assert isinstance(mail, email.message.Message)
+
+    html = mail.get_payload()[0].get_payload(decode = True)
+
+    """ document in html2 has the same structure as the one in html.
+        only difference is that hyperlink urls in html2 have been
+        replaced by 'email.uber.com/wf/click?upn=.*' urls.
+        """
+    html2 = mail.get_payload()[1].get_payload()[0].get_payload(decode = True)
+
+    route_map = mail.get_payload()[1].get_payload()[1].get_payload(decode = True)
+
+    doc = BeautifulSoup.BeautifulSoup(
+        html,
+        convertEntities = BeautifulSoup.BeautifulSoup.HTML_ENTITIES,
+        )
+
+    # strptime
+    with ioex.setlocale('en_US.UTF-8'):
+
+        trip_id = re.search(
+            ur'[\da-f\-]{36}',
+            doc.find(text = 'Visit the trip page').parent['href'],
+            ).group(0)
+        order = dingguo.Order(
+            u'uber',
+            trip_id,
+            datetime.datetime.strptime(
+                doc.find(attrs = {'class': 'date'}).text,
+                '%B %d, %Y',
+                ),
+            )
+
+        departure_time_tag = doc.find(attrs = {'class': 'from time'})
+        departure_time = datetime.datetime.strptime(
+            departure_time_tag.text,
+            '%I:%M%p',
+            ).time()
+
+        arrival_time_tag = doc.find(attrs = {'class': 'to time'})
+        arrival_time = datetime.datetime.strptime(
+            arrival_time_tag.text,
+            '%I:%M%p',
+            ).time()
+
+    distance = dingguo.Distance(
+        float(doc.find(text = 'kilometers').parent.parent.find(attrs = {'class': 'data'}).text),
+        u'km',
+        )
+
+    fare = doc.find(attrs = {'class': 'header-price'}).find(attrs = {'class': 'header-fare text-pad'}).text
+
+    order.items.append(dingguo.TaxiRide(
+        name = doc.find(text = 'CAR').parent.parent.find(attrs = {'class': 'data'}).text + ' Ride',
+        price_brutto = dingguo.Sum(float(fare[1:]), fare[0]),
+        arrival_time = datetime.datetime.combine(order.order_date, arrival_time),
+        departure_time = datetime.datetime.combine(order.order_date, departure_time),
+        departure_point = departure_time_tag.parent.find(attrs = {'class': 'address'}).text,
+        destination_point = arrival_time_tag.parent.find(attrs = {'class': 'address'}).text,
+        distance = distance,
+        driver = doc.find(attrs = {'class': 'driver-info'}).text[len('You rode with '):],
+        route_map = route_map,
+        ))
+
+    return [order]

+ 76 - 0
dingguo/parser/yipbee.py

@@ -0,0 +1,76 @@
+# -*- coding: utf-8 -*-
+
+import BeautifulSoup
+import datetime
+import dingguo
+import email
+import ioex
+import re
+
+def parse_order_confirmation_mail(mail):
+
+    assert isinstance(mail, email.message.Message)
+
+    text = mail.get_payload()[0].get_payload()[0].get_payload(decode = True).decode('utf-8')
+    if not u'Vielen Dank für deine Bestellung bei yipbee' in text:
+        raise Exception('no yipbee confirmation')
+
+    order_match_groups = re.search(
+        ur'[\W\w]+'
+            + ur'BESTELLUNG: (?P<order_id>\w+) vom (?P<order_time>\d\d.\d\d.\d{4} \d\d:\d\d:\d\d)'
+            + ur'[\W\w]+'
+            + ur'GESAMTPREIS\s+'
+            + ur'(?P<articles_and_discount_text>[\W\w]+)'
+            + ur'(?P<summary_text>ARTIKEL [\W\w]+)',
+        text,
+        re.UNICODE
+        ).groupdict()
+
+    order = dingguo.Order(
+        u'yipbee',
+        order_match_groups['order_id'],
+        datetime.datetime.strptime(order_match_groups['order_time'], '%d.%m.%Y %H:%M:%S'),
+        )
+
+    for article_match in re.finditer(
+            ur'(?P<name>[\w\-\.\:,%\(\) ]+ (Klasse \d|[\w\-\. ]+[^\d ]))'
+                + ur'(?P<total_price>\d+,\d\d) €(?P<quantity>\d)(?P<total_price_2>\d+,\d\d) €',
+            order_match_groups['articles_and_discount_text'].replace('\n', ' '),
+            re.UNICODE,
+            ):
+        article_match_groups = article_match.groupdict()
+        total_price = float(article_match_groups['total_price'].replace(',', '.'))
+        total_price_2 = float(article_match_groups['total_price_2'].replace(',', '.'))
+        assert abs(total_price - total_price_2) < 0.01, 'expected %f, received %f' % (total_price, total_price_2)
+        quantity = int(article_match_groups['quantity'])
+        order.items.append(dingguo.Article(
+            name = article_match_groups['name'],
+            price_brutto = dingguo.Sum(round(total_price / quantity, 2), u'EUR'),
+            quantity = quantity,
+            reseller = u'yipbee',
+            shipper = u'yipbee',
+            ))
+
+    articles_price = float(text.split('RABATTE')[0].split('ARTIKEL')[-1].strip().split(' ')[0].replace(',', '.'))
+    assert abs(articles_price - sum([a.price_brutto.value * a.quantity for a in order.items])) < 0.01
+
+    discount_tag = BeautifulSoup.BeautifulSoup(
+        order_match_groups['articles_and_discount_text'],
+        convertEntities = BeautifulSoup.BeautifulSoup.HTML_ENTITIES,
+        ).find('tr')
+    if discount_tag:
+        name_tag, value_tag = discount_tag.findAll('td', recursive = False)
+        value, currency = value_tag.text.split(' ')
+        order.discounts.append(dingguo.Discount(
+            name = name_tag.text,
+            amount = dingguo.Sum(float(value.replace(',', '.')) * -1, currency),
+            ))
+
+    delivery_price = order_match_groups['summary_text'].split('VERSAND')[1].split('STEUERN')[0].strip()
+    delivery_price_value, delivery_price_currency = delivery_price.split(' ')
+    order.items.append(dingguo.Item(
+        name = u'Delivery',
+        price_brutto = dingguo.Sum(float(delivery_price_value.replace(',', '.')), delivery_price_currency),
+        ))
+
+    return [order]

+ 22 - 519
scripts/order-confirmation-mail-parser

@@ -2,513 +2,12 @@
 # -*- coding: utf-8 -*-
 # PYTHON_ARGCOMPLETE_OK
 
-import dingguo
-import dingguo.parser.banggood
-
-import re
+import dingguo.parser
+import email
 import os
+import shutil
 import sys
 import yaml
-import email
-import shutil
-import pprint
-import random
-import locale
-import argparse
-import datetime
-import traceback
-import subprocess
-import HTMLParser
-import argcomplete
-import BeautifulSoup
-
-def parse_amazon(msg):
-
-    msg_text = msg.get_payload()[0].get_payload(decode = True).decode('utf-8')
-
-    if not u'Amazon.de Bestellbestätigung' in msg_text:
-        raise Exception('no amazon order confirmation')
-
-    orders = []
-
-    for order_text in re.split(ur'={32,}', msg_text)[1:-1]:
-
-        order_id = re.search(r'Bestellnummer #(.+)', order_text).group(1)
-
-        order_date_formatted = re.search(ur'Aufgegeben am (.+)', order_text, re.UNICODE).group(1)
-        locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
-        order_date = datetime.datetime.strptime(order_date_formatted.encode('utf-8'), '%d. %B %Y')
-
-        order = dingguo.Order(
-            u'amazon.de',
-            order_id,
-            order_date
-            )
-
-        articles_text = order_text.split('Bestellte(r) Artikel:')[1].split('_' * 10)[0].strip()
-        for article_text in re.split(ur'\n\t*\n', articles_text):
-            article_match = re.match(
-                ur' *((?P<quantity>\d+) x )?(?P<name>.*)\n'
-                    + ur'( *von (?P<authors>.*)\n)?'
-                    + ur' *(?P<price_brutto_currency>[A-Z]+) (?P<price_brutto>\d+,\d+)\n'
-                    + ur'( *Zustand: (?P<state>.*)\n)?'
-                    + ur' *Verkauft von: (?P<reseller>.*)'
-                    + ur'(\n *Versand durch (?P<shipper>.*))?',
-                article_text,
-                re.MULTILINE | re.UNICODE
-                )
-            if article_match is None:
-                sys.stderr.write(repr(article_text) + '\n')
-                raise Exception('could not match article')
-            article = article_match.groupdict()
-            order.items.append(dingguo.Article(
-                name = article['name'],
-                price_brutto = dingguo.Sum(
-                    float(article['price_brutto'].replace(',', '.')),
-                    article['price_brutto_currency']
-                    ),
-                quantity = int(article['quantity']) if article['quantity'] else 1,
-                authors = article['authors'].split(',') if article['authors'] else [],
-                state = article['state'],
-                reseller = article['reseller'],
-                shipper = article['shipper'],
-                ))
-
-        orders.append(order)
-
-    return orders
-
-def parse_oebb(msg):
-
-    msg = msg.get_payload()[0]
-    if type(msg.get_payload()) is list:
-        msg = msg.get_payload()[0]
-    msg_text = msg.get_payload(decode = True).decode('utf8')
-
-    # msg_text = re.sub(
-    #     r'<[^>]+>',
-    #     '',
-    #     HTMLParser.HTMLParser().unescape(msg.get_payload(decode = True).decode('utf8'))
-    #     )
-
-    order_match = re.search(
-        ur'Booking code:\s+(?P<order_id>[\d ]+)\s+'
-            + ur'Customer number:\s+(?P<customer_id>PV\d+)\s+'
-            + ur'Booking date:\s+(?P<order_date>.* \d{4})\s',
-        msg_text,
-        re.MULTILINE | re.UNICODE
-        )
-    order_match_groups = order_match.groupdict()
-
-    locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
-    order_date = datetime.datetime.strptime(
-        order_match_groups['order_date'],
-        '%b %d, %Y'
-        )
-
-    order = dingguo.Order(
-        u'oebb',
-        order_match_groups['order_id'],
-        order_date,
-        customer_id = order_match_groups['customer_id'],
-        )
-
-    item_match = re.search(
-        ur'(?P<price_brutto_currency>.)(?P<price_brutto>\d+\.\d+)'
-            + ur'[\W\w]+'
-            + ur'Your Booking\s+'
-            + ur'(?P<departure_point>.*)\s+>\s+(?P<destination_point>.*)',
-        msg_text,
-        re.MULTILINE | re.UNICODE
-        )
-    item = item_match.groupdict()
-    order.items.append(dingguo.Transportation(
-        name = u'Train Ticket',
-        price_brutto = dingguo.Sum(
-            float(item['price_brutto']),
-            item['price_brutto_currency'],
-            ),
-        departure_point = item['departure_point'],
-        destination_point = item['destination_point'],
-        ))
-
-    return [order]
-
-def parse_mytaxi(msg):
-
-    if not 'mytaxi' in msg.get_payload()[0].get_payload()[0].get_payload(decode = True):
-        raise Exception('no mytaxi mail')
-
-    pdf_compressed = msg.get_payload()[1].get_payload(decode = True)
-    pdftk = subprocess.Popen(
-            ['pdftk - output - uncompress'],
-            shell = True,
-            stdin = subprocess.PIPE,
-            stdout = subprocess.PIPE,
-            )
-    pdf_uncompressed = pdftk.communicate(
-        input = pdf_compressed,
-        )[0].decode('latin-1')
-    assert type(pdf_uncompressed) is unicode
-
-    order_match = re.search(
-        ur'Rechnungsnummer:[^\(]+\((?P<order_id>\w+)\)',
-        pdf_uncompressed,
-        re.MULTILINE | re.UNICODE
-        )
-    order_id = order_match.groupdict()['order_id']
-
-    ride_match_groups = re.search(
-        ur'\(Bruttobetrag\)'
-            + ur'[^\(]+'
-            + ur'\((?P<price_brutto>\d+,\d+) (?P<price_brutto_currency>.+)\)'
-            + ur'[\w\W]+'
-            + ur'\((?P<driver>[^\(]+)\)'
-            + ur'[^\(]+'
-            + ur'\(\d+,\d+ .\)'
-            + ur'[^\(]+'
-            + ur'\((?P<name>Taxifahrt)'
-            + ur'[^\(]+'
-            + ur'\(von: (?P<departure_point>[^\)]+)'
-            + ur'[^\(]+'
-            + ur'\(nach: (?P<destination_point>[^\)]+)'
-            + ur'[\w\W]+'
-            + ur'Belegdatum \\\(Leistungszeitpunkt\\\):[^\(]+\((?P<arrival_time>\d\d.\d\d.\d\d \d\d:\d\d)\)',
-        pdf_uncompressed,
-        re.MULTILINE | re.UNICODE
-        ).groupdict()
-
-    arrival_time = datetime.datetime.strptime(
-        ride_match_groups['arrival_time'],
-        '%d.%m.%y %H:%M'
-        )
-
-    order = dingguo.Order(
-        u'mytaxi',
-        order_id,
-        arrival_time,
-        )
-    locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
-    order.items.append(dingguo.TaxiRide(
-        price_brutto = dingguo.Sum(
-            float(ride_match_groups['price_brutto'].replace(',', '.')),
-            # why 0x80 ?
-            u'EUR' if (ride_match_groups['price_brutto_currency'] == u'\x80')
-                else ride_match_groups['price_brutto_currency'],
-            ),
-        departure_point = ride_match_groups['departure_point'],
-        destination_point = ride_match_groups['destination_point'],
-        driver = ride_match_groups['driver'],
-        arrival_time = arrival_time,
-        ))
-    return [order]
-
-def parse_uber(msg):
-
-    html = msg.get_payload()[0].get_payload(decode = True)
-
-    """ document in html2 has the same structure as the one in html.
-        only difference is that hyperlink urls in html2 have been
-        replaced by 'email.uber.com/wf/click?upn=.*' urls.
-        """
-    html2 = msg.get_payload()[1].get_payload()[0].get_payload(decode = True)
-
-    route_map = msg.get_payload()[1].get_payload()[1].get_payload(decode = True)
-
-    doc = BeautifulSoup.BeautifulSoup(
-        html,
-        convertEntities = BeautifulSoup.BeautifulSoup.HTML_ENTITIES,
-        )
-
-    # strptime
-    locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
-
-    trip_id = re.search(
-        ur'[\da-f\-]{36}',
-        doc.find(text = 'Visit the trip page').parent['href'],
-        ).group(0)
-    order = dingguo.Order(
-        u'uber',
-        trip_id,
-        datetime.datetime.strptime(
-            doc.find(attrs = {'class': 'date'}).text,
-            '%B %d, %Y',
-            ),
-        )
-
-    departure_time_tag = doc.find(attrs = {'class': 'from time'})
-    departure_time = datetime.datetime.strptime(
-        departure_time_tag.text,
-        '%I:%M%p',
-        ).time()
-
-    arrival_time_tag = doc.find(attrs = {'class': 'to time'})
-    arrival_time = datetime.datetime.strptime(
-        arrival_time_tag.text,
-        '%I:%M%p',
-        ).time()
-
-    distance = dingguo.Distance(
-        float(doc.find(text = 'kilometers').parent.parent.find(attrs = {'class': 'data'}).text),
-        u'km',
-        )
-
-    fare = doc.find(attrs = {'class': 'header-price'}).find(attrs = {'class': 'header-fare text-pad'}).text
-
-    order.items.append(dingguo.TaxiRide(
-        name = doc.find(text = 'CAR').parent.parent.find(attrs = {'class': 'data'}).text + ' Ride',
-        price_brutto = dingguo.Sum(float(fare[1:]), fare[0]),
-        arrival_time = datetime.datetime.combine(order.order_date, arrival_time),
-        departure_time = datetime.datetime.combine(order.order_date, departure_time),
-        departure_point = departure_time_tag.parent.find(attrs = {'class': 'address'}).text,
-        destination_point = arrival_time_tag.parent.find(attrs = {'class': 'address'}).text,
-        distance = distance,
-        driver = doc.find(attrs = {'class': 'driver-info'}).text[len('You rode with '):],
-        route_map = route_map,
-        ))
-
-    return [order]
-
-def parse_yipbee(msg):
-
-    text = msg.get_payload()[0].get_payload()[0].get_payload(decode = True).decode('utf-8')
-    if not u'Vielen Dank für deine Bestellung bei yipbee' in text:
-        raise Exception('no yipbee confirmation')
-
-    order_match_groups = re.search(
-        ur'[\W\w]+'
-            + ur'BESTELLUNG: (?P<order_id>\w+) vom (?P<order_time>\d\d.\d\d.\d{4} \d\d:\d\d:\d\d)'
-            + ur'[\W\w]+'
-            + ur'GESAMTPREIS\s+'
-            + ur'(?P<articles_and_discount_text>[\W\w]+)'
-            + ur'(?P<summary_text>ARTIKEL [\W\w]+)',
-        text,
-        re.UNICODE
-        ).groupdict()
-
-    order = dingguo.Order(
-        u'yipbee',
-        order_match_groups['order_id'],
-        datetime.datetime.strptime(order_match_groups['order_time'], '%d.%m.%Y %H:%M:%S'),
-        )
-
-    for article_match in re.finditer(
-            ur'(?P<name>[\w\-\.\:,%\(\) ]+ (Klasse \d|[\w\-\. ]+[^\d ]))'
-                + ur'(?P<total_price>\d+,\d\d) €(?P<quantity>\d)(?P<total_price_2>\d+,\d\d) €',
-            order_match_groups['articles_and_discount_text'].replace('\n', ' '),
-            re.UNICODE,
-            ):
-        article_match_groups = article_match.groupdict()
-        total_price = float(article_match_groups['total_price'].replace(',', '.'))
-        total_price_2 = float(article_match_groups['total_price_2'].replace(',', '.'))
-        assert abs(total_price - total_price_2) < 0.01, 'expected %f, received %f' % (total_price, total_price_2)
-        quantity = int(article_match_groups['quantity'])
-        order.items.append(dingguo.Article(
-            name = article_match_groups['name'],
-            price_brutto = dingguo.Sum(round(total_price / quantity, 2), u'EUR'),
-            quantity = quantity,
-            reseller = u'yipbee',
-            shipper = u'yipbee',
-            ))
-
-    articles_price = float(text.split('RABATTE')[0].split('ARTIKEL')[-1].strip().split(' ')[0].replace(',', '.'))
-    assert abs(articles_price - sum([a.price_brutto.value * a.quantity for a in order.items])) < 0.01
-
-    discount_tag = BeautifulSoup.BeautifulSoup(
-        order_match_groups['articles_and_discount_text'],
-        convertEntities = BeautifulSoup.BeautifulSoup.HTML_ENTITIES,
-        ).find('tr')
-    if discount_tag:
-        name_tag, value_tag = discount_tag.findAll('td', recursive = False)
-        value, currency = value_tag.text.split(' ')
-        order.discounts.append(dingguo.Discount(
-            name = name_tag.text,
-            amount = dingguo.Sum(float(value.replace(',', '.')) * -1, currency),
-            ))
-
-    delivery_price = order_match_groups['summary_text'].split('VERSAND')[1].split('STEUERN')[0].strip()
-    delivery_price_value, delivery_price_currency = delivery_price.split(' ')
-    order.items.append(dingguo.Item(
-        name = u'Delivery',
-        price_brutto = dingguo.Sum(float(delivery_price_value.replace(',', '.')), delivery_price_currency),
-        ))
-
-    return [order]
-
-def parse_yipbee_html(msg):
-
-    html = msg.get_payload()[0].get_payload()[1].get_payload(decode = True)
-
-    if not 'yipbee' in html:
-        raise Exception('no yipbee confirmation')
-
-    doc = BeautifulSoup.BeautifulSoup(html, convertEntities = BeautifulSoup.BeautifulSoup.HTML_ENTITIES)
-    content_table = doc.find('table')
-
-    order_match_groups = re.search(
-        ur'Bestellung:(?P<order_id>\w+) vom (?P<order_time>\d\d.\d\d.\d{4} \d\d:\d\d:\d\d)',
-        content_table.find('table').findAll('tr')[3].text,
-        re.UNICODE
-        ).groupdict()
-
-    order = dingguo.Order(
-        u'yipbee',
-        order_match_groups['order_id'],
-        datetime.datetime.strptime(order_match_groups['order_time'], '%d.%m.%Y %H:%M:%S'),
-        )
-
-    articles_table = content_table.find('table').find('tbody').findAll('tr', recursive = False)[4].find('table')
-    for article_row in articles_table.find('tbody').findAll('tr', recursive = False)[1:]:
-        article_columns = article_row.findAll('td', recursive = False)
-        (price, currency) = re.sub(ur'\s+', ' ', article_columns[2].text.replace(u',', u'.')).split(' ')
-        order.items.append(dingguo.Article(
-            name = article_columns[1].text,
-            price_brutto = dingguo.Sum(float(price), currency),
-            quantity = int(article_columns[3].text),
-            reseller = u'yipbee',
-            shipper = u'yipbee',
-            ))
-
-    discount_row = content_table.find('table').find('tbody').findAll('tr', recursive = False)[6]
-    (discount_name, discount_value_with_currency) = [c.text for c in discount_row.findAll('td', recursive = False)]
-    (discount_value, discount_currency) = discount_value_with_currency.split(' ')
-    order.discounts.append(dingguo.Discount(
-        name = discount_name,
-        amount = dingguo.Sum(float(discount_value.replace(',', '.')) * -1, discount_currency)
-        ))
-
-    shipping_costs_table = content_table.find('tbody').findAll('tr', recursive = False)[3].findAll('table')[1]
-    (shipping_price, shipping_currency) = shipping_costs_table.text.replace(',', '.').split(' ')
-    order.items.append(dingguo.Item(
-        name = u'Delivery',
-        price_brutto = dingguo.Sum(float(shipping_price), shipping_currency),
-        ))
-
-    return [order]
-
-def parse_lieferservice(msg):
-
-    text = msg.get_payload()[0].get_payload(decode = True).decode('utf-8').replace('\r\n', '\n')
-    assert type(text) is unicode
-
-    if not 'Lieferservice.at' in text:
-        raise Exception('no lieferservice.at confirmation')
-
-    order_match = re.search(
-            ur'(Your order|Ihre Bestellung) \(.+\) (at|bei) (?P<restaurant>.*)\s+'
-                + ur'(Your order reference is|Ihre Bestellnummer lautet): (?P<order_id>.*)\s+'
-                + ur'[\W\w]+'
-                + ur'(Your order|Ihre Bestellung)\s+'
-                + ur'(?P<orders_text>[\W\w]+)'
-                + ur'(Delivery costs|Lieferung):\s+(?P<delivery_costs>.*)\s+',
-            text,
-            re.UNICODE,
-            )
-
-    order_match_groups = order_match.groupdict()
-
-    import time
-    import email.utils
-    order_date = datetime.datetime.fromtimestamp(
-            time.mktime(email.utils.parsedate(msg['Date']))
-            )
-
-    order = dingguo.Order(
-        u'lieferservice.at',
-        order_match_groups['order_id'].strip(),
-        order_date
-        )
-
-    restaurant = order_match_groups['restaurant'].strip('"')
-
-    for article_match in re.finditer(
-            ur'(?P<quantity>\d+)x\s'
-                + ur'(?P<name>.*)\s'
-                + ur'(?P<currency>.) (?P<price>-?\d+,\d+)\s',
-            order_match_groups['orders_text'],
-            re.UNICODE,
-            ):
-        article_match_groups = article_match.groupdict()
-        quantity = int(article_match_groups['quantity'])
-        assert quantity == 1
-        name = re.sub(ur' +', ' ', article_match_groups['name'])
-        price = dingguo.Sum(
-            float(article_match_groups['price'].replace(',', '.')),
-            article_match_groups['currency'],
-            )
-        if price.value < 0:
-            price.value *= -1
-            order.discounts.append(dingguo.Discount(
-                name = name,
-                amount = price,
-                ))
-        else:
-            order.items.append(dingguo.Article(
-                name = name,
-                quantity = 1,
-                price_brutto = price,
-                reseller = restaurant,
-                shipper = restaurant,
-                ))
-
-    delivery_costs = order_match_groups['delivery_costs'].strip()
-    if delivery_costs in ['FREE', 'GRATIS']:
-        order.items.append(dingguo.Item(
-            name = u'Delivery',
-            price_brutto = dingguo.Sum(0.0, u'EUR'),
-            ))
-    else:
-        unit, value = delivery_costs.split(' ')
-        order.items.append(dingguo.Item(
-            name = u'Delivery',
-            price_brutto = dingguo.Sum(float(value.replace(',', '.')), unit),
-            ))
-
-    return [order]
-
-def parse(msg):
-
-    tracebacks = {}
-
-    try:
-        return parse_amazon(msg)
-    except:
-        tracebacks['amazon'] = traceback.format_exc()
-
-    try:
-        return dingguo.parser.banggood.parse_order_confirmation_mail(msg)
-    except:
-        tracebacks['banggood'] = traceback.format_exc()
-
-    try:
-        return parse_oebb(msg)
-    except:
-        tracebacks['oebb'] = traceback.format_exc()
-
-    try:
-        return parse_lieferservice(msg)
-    except:
-        tracebacks['lieferservice'] = traceback.format_exc()
-
-    try:
-        return parse_mytaxi(msg)
-    except:
-        tracebacks['mytaxi'] = traceback.format_exc()
-
-    try:
-        return parse_uber(msg)
-    except:
-        tracebacks['uber'] = traceback.format_exc()
-
-    try:
-        return parse_yipbee(msg)
-    except:
-        tracebacks['yipbee'] = traceback.format_exc()
-
-    for parser_name in tracebacks:
-        sys.stderr.write('%s parser: \n%s\n' % (parser_name, tracebacks[parser_name]))
-
-    raise Exception('failed to parse')
 
 def compute(mail_path, catalogue, register_path):
 
@@ -517,7 +16,9 @@ def compute(mail_path, catalogue, register_path):
     if mail_path:
         for p in mail_path:
             with open(p, 'r') as mail:
-                mail_orders = parse(email.message_from_file(mail))
+                mail_orders = dingguo.parser.parse_order_confirmation_mail(
+                        email.message_from_file(mail)
+                        )
                 orders += mail_orders
             if catalogue:
                 for order in mail_orders:
@@ -528,26 +29,24 @@ def compute(mail_path, catalogue, register_path):
                 os.remove(p)
     else:
         msg = email.message_from_string(sys.stdin.read())
-        orders += parse(msg)
+        orders += dingguo.parser.parse_order_confirmation_mail(msg)
 
     if register_path:
-        with open(register_path, 'r') as register:
-            registered_orders = yaml.load(register.read().decode('utf-8'))
-        if not registered_orders:
-            registered_orders = {}
+        if os.path.exists(register_path):
+            with open(register_path, 'r') as register_file:
+                registry = yaml.load(register_file.read().decode('utf-8'))
+        else:
+            registry = dingguo.OrderRegistry()
         for order in orders:
-            if order.platform not in registered_orders:
-                registered_orders[order.platform] = {}
-            if order.order_id in registered_orders[order.platform]:
-                raise Exception('already registered')
-            registered_orders[order.platform][order.order_id] = order
+            registry.register(order)
         with open(register_path, 'w') as register:
-            register.write(yaml.safe_dump(registered_orders, default_flow_style = False))
-    else:
-        print(yaml.safe_dump(orders, default_flow_style = False))
+            register.write(yaml.dump(registry, default_flow_style = False, allow_unicode = True))
+        
+    print(yaml.dump(orders, default_flow_style = False, allow_unicode = True))
 
 def _init_argparser():
 
+    import argparse
     argparser = argparse.ArgumentParser(description = None)
     argparser.add_argument('--register', metavar = 'path', dest = 'register_path')
     argparser.add_argument('--catalogue', action='store_true')
@@ -557,7 +56,11 @@ def _init_argparser():
 def main(argv):
 
     argparser = _init_argparser()
-    argcomplete.autocomplete(argparser)
+    try:
+        import argcomplete
+        argcomplete.autocomplete(argparser)
+    except ImportError:
+        pass
     args = argparser.parse_args(argv)
 
     compute(**vars(args))

+ 0 - 52
tests/test_integration.py

@@ -1,52 +0,0 @@
-import pytest
-
-import os
-import glob
-import yaml
-import subprocess
-
-project_root_path = os.path.realpath(os.path.join(__file__, '..', '..'))
-script_path = os.path.join(project_root_path, 'scripts', 'order-confirmation-mail-parser')
-test_data_path = os.path.join(project_root_path, 'tests', 'data')
-
-def test_integration_stdin():
-    for mail_path in glob.glob(os.path.join(test_data_path, '*', 'mail_*.eml')):
-        with open(mail_path, 'r') as mail_file:
-            process = subprocess.Popen([script_path], stdin = subprocess.PIPE, stdout = subprocess.PIPE)
-            script_stdout, script_stderr = process.communicate(input = mail_file.read())
-            result = yaml.load(script_stdout)
-            with open(mail_path.replace('.eml', '.yml'), 'r') as expected_result_file:
-                expected_result = yaml.load(expected_result_file.read())
-                assert result == expected_result, 'unexpected result for ' + os.path.basename(mail_path)
-
-def test_integration_path():
-    result = yaml.load(subprocess.check_output([
-        script_path,
-        os.path.join(test_data_path, 'amazon', 'mail_1.eml'),
-        os.path.join(test_data_path, 'oebb', 'mail_1.eml'),
-        os.path.join(test_data_path, 'oebb', 'mail_2.eml'),
-        ]))
-    expected_result = []
-    for result_path in [
-            os.path.join(test_data_path, 'amazon', 'mail_1.yml'),
-            os.path.join(test_data_path, 'oebb', 'mail_1.yml'),
-            os.path.join(test_data_path, 'oebb', 'mail_2.yml'),
-            ]:
-        with open(result_path) as yaml_file:
-            expected_result += yaml.load(yaml_file.read())
-    assert result == expected_result
-
-def test_catalogue(tmpdir):
-    os.chdir(tmpdir.strpath)
-    import shutil
-    shutil.copyfile(os.path.join(test_data_path, 'amazon', 'mail_8.eml'), 'mail.eml')
-    assert os.listdir('.') == ['mail.eml']
-    orders = yaml.load(subprocess.check_output([
-        script_path, 
-        '--catalogue',
-        'mail.eml',
-        ]))
-    assert os.listdir('.') == ['amazon.de']
-    assert os.listdir('amazon.de') == ['028-6176648-7157123',  '028-0273468-3485109']
-    assert os.listdir(os.path.join('amazon.de', '028-6176648-7157123')) == ['mail.eml']
-    assert os.listdir(os.path.join('amazon.de', '028-0273468-3485109')) == ['mail.eml']

+ 32 - 0
tests/test_parser.py

@@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+
+import pytest
+
+import dingguo.parser
+import email
+import os
+
+project_root_path = os.path.realpath(os.path.join(__file__, '..', '..'))
+test_data_path = os.path.join(project_root_path, 'tests', 'data')
+
+@pytest.mark.parametrize('platform,mail_path', [
+    ('amazon.de', os.path.join(test_data_path, 'amazon', 'mail_1.eml')),
+    ('banggood', os.path.join(test_data_path, 'banggood', '1.eml')),
+    ('lieferservice.at', os.path.join(test_data_path, 'lieferservice.at', 'mail_1.eml')),
+    ('mytaxi', os.path.join(test_data_path, 'mytaxi', 'mail_1.eml')),
+    ('oebb', os.path.join(test_data_path, 'oebb', 'mail_1.eml')),
+    ('uber', os.path.join(test_data_path, 'uber', 'mail_1.eml')),
+    ('yipbee', os.path.join(test_data_path, 'yipbee', 'mail_1.eml')),
+    ])
+def test_parse_confirmation_mail_platform(platform, mail_path):
+    with open(mail_path) as mail:
+        parsed_orders = dingguo.parser.parse_order_confirmation_mail(
+                email.message_from_file(mail)
+                )
+    for order in parsed_orders:
+        assert order.platform == platform
+
+def test_parse_confirmation_mail_failure():
+    mail = email.message_from_string('empty mail')
+    with pytest.raises(Exception):
+        dingguo.parser.parse_order_confirmation_mail(mail)

+ 24 - 0
tests/test_parser_amazon.py

@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+
+import pytest
+
+import dingguo.parser.amazon
+import email
+import glob
+import os
+import test_yaml
+import yaml
+
+project_root_path = os.path.realpath(os.path.join(__file__, '..', '..'))
+test_data_path = os.path.join(project_root_path, 'tests', 'data', 'amazon')
+
+@pytest.mark.parametrize('mail_path', glob.glob(os.path.join(test_data_path, '*.eml')))
+def test_parse_confirmation_mail(mail_path):
+    with open(mail_path) as mail:
+        parsed_orders = dingguo.parser.amazon.parse_order_confirmation_mail(
+                email.message_from_file(mail)
+                )
+    with open(mail_path.replace('.eml', '.yml')) as yaml_file:
+        expected_orders = yaml.load(yaml_file.read())
+    assert expected_orders == parsed_orders, \
+            test_yaml.yaml_diff(expected_orders, parsed_orders)

+ 24 - 0
tests/test_parser_lieferservice.py

@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+
+import pytest
+
+import dingguo.parser.lieferservice
+import email
+import glob
+import os
+import test_yaml
+import yaml
+
+project_root_path = os.path.realpath(os.path.join(__file__, '..', '..'))
+test_data_path = os.path.join(project_root_path, 'tests', 'data', 'lieferservice.at')
+
+@pytest.mark.parametrize('mail_path', glob.glob(os.path.join(test_data_path, '*.eml')))
+def test_parse_confirmation_mail(mail_path):
+    with open(mail_path) as mail:
+        parsed_orders = dingguo.parser.lieferservice.parse_order_confirmation_mail(
+                email.message_from_file(mail)
+                )
+    with open(mail_path.replace('.eml', '.yml')) as yaml_file:
+        expected_orders = yaml.load(yaml_file.read())
+    assert expected_orders == parsed_orders, \
+            test_yaml.yaml_diff(expected_orders, parsed_orders)

+ 24 - 0
tests/test_parser_mytaxi.py

@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+
+import pytest
+
+import dingguo.parser.mytaxi
+import email
+import glob
+import os
+import test_yaml
+import yaml
+
+project_root_path = os.path.realpath(os.path.join(__file__, '..', '..'))
+test_data_path = os.path.join(project_root_path, 'tests', 'data', 'mytaxi')
+
+@pytest.mark.parametrize('mail_path', glob.glob(os.path.join(test_data_path, '*.eml')))
+def test_parse_confirmation_mail(mail_path):
+    with open(mail_path) as mail:
+        parsed_orders = dingguo.parser.mytaxi.parse_order_confirmation_mail(
+                email.message_from_file(mail)
+                )
+    with open(mail_path.replace('.eml', '.yml')) as yaml_file:
+        expected_orders = yaml.load(yaml_file.read())
+    assert expected_orders == parsed_orders, \
+            test_yaml.yaml_diff(expected_orders, parsed_orders)

+ 24 - 0
tests/test_parser_oebb.py

@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+
+import pytest
+
+import dingguo.parser.oebb
+import email
+import glob
+import os
+import test_yaml
+import yaml
+
+project_root_path = os.path.realpath(os.path.join(__file__, '..', '..'))
+test_data_path = os.path.join(project_root_path, 'tests', 'data', 'oebb')
+
+@pytest.mark.parametrize('mail_path', glob.glob(os.path.join(test_data_path, '*.eml')))
+def test_parse_confirmation_mail(mail_path):
+    with open(mail_path) as mail:
+        parsed_orders = dingguo.parser.oebb.parse_order_confirmation_mail(
+                email.message_from_file(mail)
+                )
+    with open(mail_path.replace('.eml', '.yml')) as yaml_file:
+        expected_orders = yaml.load(yaml_file.read())
+    assert expected_orders == parsed_orders, \
+            test_yaml.yaml_diff(expected_orders, parsed_orders)

+ 24 - 0
tests/test_parser_uber.py

@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+
+import pytest
+
+import dingguo.parser.uber
+import email
+import glob
+import os
+import test_yaml
+import yaml
+
+project_root_path = os.path.realpath(os.path.join(__file__, '..', '..'))
+test_data_path = os.path.join(project_root_path, 'tests', 'data', 'uber')
+
+@pytest.mark.parametrize('mail_path', glob.glob(os.path.join(test_data_path, '*.eml')))
+def test_parse_confirmation_mail(mail_path):
+    with open(mail_path) as mail:
+        parsed_orders = dingguo.parser.uber.parse_order_confirmation_mail(
+                email.message_from_file(mail)
+                )
+    with open(mail_path.replace('.eml', '.yml')) as yaml_file:
+        expected_orders = yaml.load(yaml_file.read())
+    assert expected_orders == parsed_orders, \
+            test_yaml.yaml_diff(expected_orders, parsed_orders)

+ 24 - 0
tests/test_parser_yipbee.py

@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+
+import pytest
+
+import dingguo.parser.yipbee
+import email
+import glob
+import os
+import test_yaml
+import yaml
+
+project_root_path = os.path.realpath(os.path.join(__file__, '..', '..'))
+test_data_path = os.path.join(project_root_path, 'tests', 'data', 'yipbee')
+
+@pytest.mark.parametrize('mail_path', glob.glob(os.path.join(test_data_path, '*.eml')))
+def test_parse_confirmation_mail(mail_path):
+    with open(mail_path) as mail:
+        parsed_orders = dingguo.parser.yipbee.parse_order_confirmation_mail(
+                email.message_from_file(mail)
+                )
+    with open(mail_path.replace('.eml', '.yml')) as yaml_file:
+        expected_orders = yaml.load(yaml_file.read())
+    assert expected_orders == parsed_orders, \
+            test_yaml.yaml_diff(expected_orders, parsed_orders)

+ 137 - 0
tests/test_script_order_confirmation_mail_parser.py

@@ -0,0 +1,137 @@
+import pytest
+
+import dingguo.parser
+import email
+import os
+import shutil
+import subprocess
+import yaml
+
+project_root_path = os.path.realpath(os.path.join(__file__, '..', '..'))
+script_path = os.path.join(project_root_path, 'scripts', 'order-confirmation-mail-parser')
+test_data_path = os.path.join(project_root_path, 'tests', 'data')
+
+def get_mail_path_single_order_platform_a():
+    return os.path.join(test_data_path, 'amazon', 'mail_1.eml')
+
+def get_mail_path_single_order_platform_b():
+    return os.path.join(test_data_path, 'banggood', '1.eml')
+
+def get_mail_path_two_orders():
+    return os.path.join(test_data_path, 'amazon', 'mail_2.eml')
+
+@pytest.mark.parametrize('mail_path', [
+    get_mail_path_single_order_platform_a(),
+    get_mail_path_two_orders(),
+    ])
+def test_stdin(mail_path):
+    process = subprocess.Popen([script_path], stdin = subprocess.PIPE, stdout = subprocess.PIPE)
+    with open(mail_path, 'r') as mail_file:
+        script_stdout, script_stderr = process.communicate(input = mail_file.read())
+    parsed_orders = yaml.load(script_stdout)
+    with open(mail_path.replace('.eml', '.yml'), 'r') as expected_result_file:
+        expected_orders = yaml.load(expected_result_file.read())
+    assert expected_orders == parsed_orders
+
+@pytest.mark.parametrize('mail_paths', [
+    [
+        get_mail_path_single_order_platform_a(),
+        ],
+    [
+        get_mail_path_two_orders(),
+        ],
+    [
+        get_mail_path_single_order_platform_a(),
+        get_mail_path_two_orders(),
+        ],
+    ])
+def test_path(mail_paths):
+    parsed_orders = yaml.load(subprocess.check_output([script_path] + mail_paths))
+    expected_orders = []
+    for mail_path in mail_paths:
+        with open(mail_path.replace('.eml', '.yml'), 'r') as expected_result_file:
+            expected_orders += yaml.load(expected_result_file.read())
+    assert expected_orders == parsed_orders
+
+@pytest.mark.parametrize('mail_paths', [
+    [
+        get_mail_path_single_order_platform_a(),
+        get_mail_path_single_order_platform_b(),
+        get_mail_path_two_orders(),
+        ],
+    ])
+def test_catalogue(tmpdir, mail_paths):
+    os.chdir(tmpdir.strpath)
+    for mail_index, mail_path in enumerate(mail_paths):
+        shutil.copyfile(mail_path, '%d.eml' % mail_index)
+    assert len(os.listdir('.')) == len(mail_paths)
+    orders = yaml.load(subprocess.check_output(
+        [script_path, '--catalogue'] + os.listdir('.'),
+        ))
+    assert set(os.listdir('.')) == set([o.platform for o in orders])
+    for dir_name in os.listdir('.'):
+        assert set(os.listdir(dir_name)) \
+                == set([o.order_id for o in orders if o.platform == dir_name])
+
+@pytest.mark.parametrize('mail_paths', [
+    [
+        get_mail_path_single_order_platform_a(),
+        ],
+    [
+        get_mail_path_two_orders(),
+        ],
+    [
+        get_mail_path_single_order_platform_a(),
+        get_mail_path_two_orders(),
+        ],
+    ])
+def test_register_create(tmpdir, mail_paths):
+    os.chdir(tmpdir.strpath)
+    subprocess.check_output(
+        [script_path, '--register', 'registry.yml'] + mail_paths,
+        )
+    with open('registry.yml') as registry_file:
+        registry = yaml.load(registry_file.read())
+    orders = []
+    for mail_path in mail_paths:
+        with open(mail_path) as mail:
+            orders += dingguo.parser.parse_order_confirmation_mail(
+                    email.message_from_file(mail)
+                    )
+    for order in orders:
+        assert order == registry.registry[order.platform][order.order_id]
+    for platform in registry.registry:
+        for order_id in registry.registry[platform]:
+            assert registry.registry[platform][order_id] in orders
+
+@pytest.mark.parametrize('pre_mail_paths,post_mail_paths', [
+    (
+        [get_mail_path_single_order_platform_a()], 
+        [get_mail_path_single_order_platform_b()],
+        ),
+    (
+        [get_mail_path_single_order_platform_a(), get_mail_path_single_order_platform_b()], 
+        [get_mail_path_two_orders()],
+        ),
+    ])
+def test_register_update(tmpdir, pre_mail_paths, post_mail_paths):
+    os.chdir(tmpdir.strpath)
+    subprocess.check_output(
+        [script_path, '--register', 'registry.yml'] + pre_mail_paths,
+        )
+    subprocess.check_output(
+        [script_path, '--register', 'registry.yml'] + post_mail_paths,
+        )
+    with open('registry.yml') as registry_file:
+        registry = yaml.load(registry_file.read())
+    orders = []
+    for mail_path in (pre_mail_paths + post_mail_paths):
+        with open(mail_path) as mail:
+            orders += dingguo.parser.parse_order_confirmation_mail(
+                    email.message_from_file(mail)
+                    )
+    for order in orders:
+        assert order == registry.registry[order.platform][order.order_id]
+    for platform in registry.registry:
+        for order_id in registry.registry[platform]:
+            assert registry.registry[platform][order_id] in orders

+ 85 - 5
tests/test_yaml.py

@@ -77,22 +77,48 @@ def get_discount_b():
             amount = get_sum_b(),
             )
 
-def get_order_a():
+def get_order_a(items = True, discounts = True):
     order = dingguo.Order(
             platform = u'platformπ',
             order_id = u'id',
             order_date = datetime.datetime(2016, 5, 8, 0, 18, 17),
             customer_id = u'customer',
             )
-    order.items.append(get_item_a())
-    order.items.append(get_item_b())
-    order.discounts.append(get_discount_a())
-    order.discounts.append(get_discount_b())
+    if items:
+        order.items.append(get_item_a())
+        order.items.append(get_item_b())
+    if discounts:
+        order.discounts.append(get_discount_a())
+        order.discounts.append(get_discount_b())
+    return order
+
+def get_order_b():
+    order = dingguo.Order(
+            platform = u'platformπ',
+            order_id = u'order_b',
+            order_date = datetime.datetime(2015, 5, 8, 0, 18, 17),
+            )
+    return order
+
+def get_order_c():
+    order = dingguo.Order(
+            platform = u'γάμμα',
+            order_id = u'order_βήτα',
+            order_date = datetime.datetime(2014, 5, 8, 0, 18, 17),
+            customer_id = u'ρώ',
+            )
     return order
 
 def get_distance():
     return dingguo.Distance(2.4142, u'km')
 
+def get_order_registry():
+    registry = dingguo.OrderRegistry()
+    registry.register(get_order_a(items = False, discounts = False))
+    registry.register(get_order_b())
+    registry.register(get_order_c())
+    return registry
+
 def to_yaml(data):
     return yaml.dump(data, default_flow_style = False, allow_unicode = True).decode('utf-8')
 
@@ -322,3 +348,57 @@ name: taxi ride
 price_brutto: !sum '1.23 EUR'
 route_map: null
 """)
+
+def test_order_registry_to_yaml():
+    assert to_yaml(get_order_registry()) == u"""!order-registry
+platformπ:
+  id: !order
+    customer_id: customer
+    discounts: []
+    items: []
+    order_date: 2016-05-08
+    order_id: id
+    platform: platformπ
+  order_b: !order
+    customer_id: null
+    discounts: []
+    items: []
+    order_date: 2015-05-08
+    order_id: order_b
+    platform: platformπ
+γάμμα:
+  order_βήτα: !order
+    customer_id: ρώ
+    discounts: []
+    items: []
+    order_date: 2014-05-08
+    order_id: order_βήτα
+    platform: γάμμα
+"""
+
+def test_order_registry_from_yaml():
+    expected = get_order_registry()
+    loaded = yaml.load(u"""!order-registry
+platformπ:
+  id: !order
+    customer_id: customer
+    discounts: []
+    order_date: 2016-05-08
+    order_id: id
+    platform: platformπ
+  order_b: !order
+    customer_id: null
+    items: []
+    order_date: 2015-05-08
+    order_id: order_b
+    platform: platformπ
+γάμμα:
+  order_βήτα: !order
+    customer_id: ρώ
+    discounts: []
+    items: []
+    order_date: 2014-05-08
+    order_id: order_βήτα
+    platform: γάμμα
+""")
+    assert expected == loaded, yaml_diff(expected, loaded)