|
@@ -13,6 +13,7 @@ import locale
|
|
|
import argparse
|
|
|
import datetime
|
|
|
import traceback
|
|
|
+import subprocess
|
|
|
import HTMLParser
|
|
|
import argcomplete
|
|
|
|
|
@@ -25,7 +26,7 @@ def parse_amazon(msg):
|
|
|
'platform': 'amazon.de',
|
|
|
}
|
|
|
|
|
|
- msg_text = msg.get_payload(decode = True)
|
|
|
+ msg_text = msg.get_payload()[0].get_payload(decode = True)
|
|
|
|
|
|
order['order_id'] = re.search(r'Bestellnummer #(.+)', msg_text).group(1)
|
|
|
|
|
@@ -63,11 +64,13 @@ def parse_amazon(msg):
|
|
|
|
|
|
def parse_oebb(msg):
|
|
|
|
|
|
- msg_text = re.sub(
|
|
|
- r'<[^>]+>',
|
|
|
- '',
|
|
|
- HTMLParser.HTMLParser().unescape(msg.get_payload(decode = True).decode('utf8'))
|
|
|
- )
|
|
|
+ msg_text = msg.get_payload()[0].get_payload(decode = True).decode('utf8')
|
|
|
+
|
|
|
+ # msg_text = re.sub(
|
|
|
+ # r'<[^>]+>',
|
|
|
+ # '',
|
|
|
+ # HTMLParser.HTMLParser().unescape(msg.get_payload(decode = True).decode('utf8'))
|
|
|
+ # )
|
|
|
|
|
|
order_match = re.search(
|
|
|
ur'Booking code:\s+(?P<order_id>[\d ]+)\s+'
|
|
@@ -103,6 +106,64 @@ def parse_oebb(msg):
|
|
|
|
|
|
return order
|
|
|
|
|
|
+def parse_mytaxi(msg):
|
|
|
+
|
|
|
+ pdf_compressed = msg.get_payload()[1].get_payload(decode = True)
|
|
|
+ pdftk = subprocess.Popen(
|
|
|
+ ['pdftk - output - uncompress'],
|
|
|
+ shell = True,
|
|
|
+ stdin = subprocess.PIPE,
|
|
|
+ stdout = subprocess.PIPE,
|
|
|
+ )
|
|
|
+ pdf_uncompressed = pdftk.communicate(
|
|
|
+ input = pdf_compressed,
|
|
|
+ )[0].decode('latin-1')
|
|
|
+ assert type(pdf_uncompressed) is unicode
|
|
|
+
|
|
|
+ order_match = re.search(
|
|
|
+ ur'Rechnungsnummer:[^\(]+\((?P<order_id>\w+)\)',
|
|
|
+ pdf_uncompressed,
|
|
|
+ re.MULTILINE | re.UNICODE
|
|
|
+ )
|
|
|
+ order = order_match.groupdict()
|
|
|
+ order['platform'] = 'mytaxi'
|
|
|
+
|
|
|
+ article_match = re.search(
|
|
|
+ ur'\(Bruttobetrag\)'
|
|
|
+ + ur'[^\(]+'
|
|
|
+ + ur'\((?P<price_brutto>\d+,\d+) (?P<price_brutto_currency>.+)\)'
|
|
|
+ + ur'[\w\W]+'
|
|
|
+ + ur'\((?P<driver>[^\(]+)\)'
|
|
|
+ + ur'[^\(]+'
|
|
|
+ + ur'\(\d+,\d+ .\)'
|
|
|
+ + ur'[^\(]+'
|
|
|
+ + ur'\((?P<name>Taxifahrt)'
|
|
|
+ + ur'[^\(]+'
|
|
|
+ + ur'\(von: (?P<departure_point>[^\)]+)'
|
|
|
+ + ur'[^\(]+'
|
|
|
+ + ur'\(nach: (?P<destination_point>[^\)]+)'
|
|
|
+ + ur'[\w\W]+'
|
|
|
+ + ur'Belegdatum \\\(Leistungszeitpunkt\\\):[^\(]+\((?P<arrival_time>\d\d.\d\d.\d\d \d\d:\d\d)\)',
|
|
|
+ pdf_uncompressed,
|
|
|
+ re.MULTILINE | re.UNICODE
|
|
|
+ )
|
|
|
+ article = article_match.groupdict()
|
|
|
+ locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
|
|
|
+ arrival_time = datetime.datetime.strptime(
|
|
|
+ article['arrival_time'],
|
|
|
+ '%d.%m.%y %H:%M'
|
|
|
+ )
|
|
|
+ article['arrival_time'] = arrival_time.strftime('%Y-%m-%d %H:%M')
|
|
|
+ order['order_date'] = arrival_time.strftime('%Y-%m-%d')
|
|
|
+ article['price_brutto'] = float(article['price_brutto'].replace(',', '.'))
|
|
|
+ if article['price_brutto_currency'] in [u'€', u'\x80']:
|
|
|
+ article['price_brutto_currency'] = 'EUR'
|
|
|
+ else:
|
|
|
+ raise exception('currency %s is not supported' % article['price_brutto_currency'])
|
|
|
+ order['articles'] = [article]
|
|
|
+
|
|
|
+ return order
|
|
|
+
|
|
|
def parse(msg):
|
|
|
|
|
|
tracebacks = {}
|
|
@@ -117,25 +178,23 @@ def parse(msg):
|
|
|
except:
|
|
|
tracebacks['oebb'] = traceback.format_exc()
|
|
|
|
|
|
+ try:
|
|
|
+ return parse_mytaxi(msg)
|
|
|
+ except:
|
|
|
+ tracebacks['mytaxi'] = traceback.format_exc()
|
|
|
+
|
|
|
for parser_name in tracebacks:
|
|
|
print('%s parser: \n%s' % (parser_name, tracebacks[parser_name]))
|
|
|
|
|
|
- print('failed')
|
|
|
- # raise Exception('failed to parse')
|
|
|
+ raise Exception('failed to parse')
|
|
|
|
|
|
def compute():
|
|
|
|
|
|
msg = email.message_from_string(sys.stdin.read())
|
|
|
|
|
|
- orders = []
|
|
|
-
|
|
|
- if msg.is_multipart():
|
|
|
- for part in msg.get_payload():
|
|
|
- orders.append(parse(part))
|
|
|
- else:
|
|
|
- orders.append(parse(msg))
|
|
|
+ order = parse(msg)
|
|
|
|
|
|
- print(yaml.safe_dump(orders, default_flow_style = False))
|
|
|
+ print(yaml.safe_dump(order, default_flow_style = False))
|
|
|
|
|
|
def _init_argparser():
|
|
|
|