Browse Source

support mytaxi invoices

Fabian Peter Hammerle 9 years ago
parent
commit
c2db3603b1
1 changed files with 75 additions and 16 deletions
  1. 75 16
      order-confirmation-mail-parser

+ 75 - 16
order-confirmation-mail-parser

@@ -13,6 +13,7 @@ import locale
 import argparse
 import datetime
 import traceback
+import subprocess
 import HTMLParser
 import argcomplete
 
@@ -25,7 +26,7 @@ def parse_amazon(msg):
         'platform': 'amazon.de',
         }
 
-    msg_text = msg.get_payload(decode = True)
+    msg_text = msg.get_payload()[0].get_payload(decode = True)
 
     order['order_id'] = re.search(r'Bestellnummer #(.+)', msg_text).group(1)
 
@@ -63,11 +64,13 @@ def parse_amazon(msg):
 
 def parse_oebb(msg):
 
-    msg_text = re.sub(
-        r'<[^>]+>',
-        '',
-        HTMLParser.HTMLParser().unescape(msg.get_payload(decode = True).decode('utf8'))
-        )
+    msg_text = msg.get_payload()[0].get_payload(decode = True).decode('utf8')
+
+    # msg_text = re.sub(
+    #     r'<[^>]+>',
+    #     '',
+    #     HTMLParser.HTMLParser().unescape(msg.get_payload(decode = True).decode('utf8'))
+    #     )
 
     order_match = re.search(
         ur'Booking code:\s+(?P<order_id>[\d ]+)\s+'
@@ -103,6 +106,64 @@ def parse_oebb(msg):
 
     return order
 
+def parse_mytaxi(msg):
+
+    pdf_compressed = msg.get_payload()[1].get_payload(decode = True)
+    pdftk = subprocess.Popen(
+            ['pdftk - output - uncompress'],
+            shell = True,
+            stdin = subprocess.PIPE,
+            stdout = subprocess.PIPE,
+            )
+    pdf_uncompressed = pdftk.communicate(
+        input = pdf_compressed,
+        )[0].decode('latin-1')
+    assert type(pdf_uncompressed) is unicode
+
+    order_match = re.search(
+        ur'Rechnungsnummer:[^\(]+\((?P<order_id>\w+)\)',
+        pdf_uncompressed,
+        re.MULTILINE | re.UNICODE
+        )
+    order = order_match.groupdict()
+    order['platform'] = 'mytaxi'
+
+    article_match = re.search(
+        ur'\(Bruttobetrag\)'
+            + ur'[^\(]+'
+            + ur'\((?P<price_brutto>\d+,\d+) (?P<price_brutto_currency>.+)\)'
+            + ur'[\w\W]+'
+            + ur'\((?P<driver>[^\(]+)\)'
+            + ur'[^\(]+'
+            + ur'\(\d+,\d+ .\)'
+            + ur'[^\(]+'
+            + ur'\((?P<name>Taxifahrt)'
+            + ur'[^\(]+'
+            + ur'\(von: (?P<departure_point>[^\)]+)'
+            + ur'[^\(]+'
+            + ur'\(nach: (?P<destination_point>[^\)]+)'
+            + ur'[\w\W]+'
+            + ur'Belegdatum \\\(Leistungszeitpunkt\\\):[^\(]+\((?P<arrival_time>\d\d.\d\d.\d\d \d\d:\d\d)\)',
+        pdf_uncompressed,
+        re.MULTILINE | re.UNICODE
+        )
+    article = article_match.groupdict()
+    locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
+    arrival_time = datetime.datetime.strptime(
+        article['arrival_time'],
+        '%d.%m.%y %H:%M'
+        )
+    article['arrival_time'] = arrival_time.strftime('%Y-%m-%d %H:%M')
+    order['order_date'] = arrival_time.strftime('%Y-%m-%d')
+    article['price_brutto'] = float(article['price_brutto'].replace(',', '.'))
+    if article['price_brutto_currency'] in [u'€', u'\x80']:
+        article['price_brutto_currency'] = 'EUR'
+    else:
+        raise exception('currency %s is not supported' % article['price_brutto_currency'])
+    order['articles'] = [article]
+
+    return order
+
 def parse(msg):
 
     tracebacks = {}
@@ -117,25 +178,23 @@ def parse(msg):
     except:
         tracebacks['oebb'] = traceback.format_exc()
 
+    try:
+        return parse_mytaxi(msg)
+    except:
+        tracebacks['mytaxi'] = traceback.format_exc()
+
     for parser_name in tracebacks:
         print('%s parser: \n%s' % (parser_name, tracebacks[parser_name]))
 
-    print('failed')
-    # raise Exception('failed to parse')
+    raise Exception('failed to parse')
 
 def compute():
 
     msg = email.message_from_string(sys.stdin.read())
 
-    orders = []
-
-    if msg.is_multipart():
-        for part in msg.get_payload():
-            orders.append(parse(part))
-    else:
-        orders.append(parse(msg))
+    order = parse(msg)
 
-    print(yaml.safe_dump(orders, default_flow_style = False))
+    print(yaml.safe_dump(order, default_flow_style = False))
 
 def _init_argparser():