order-confirmation-mail-parser 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. # PYTHON_ARGCOMPLETE_OK
  4. import re
  5. import os
  6. import sys
  7. import yaml
  8. import email
  9. import pprint
  10. import random
  11. import locale
  12. import argparse
  13. import datetime
  14. import traceback
  15. import subprocess
  16. import HTMLParser
  17. import argcomplete
  18. import BeautifulSoup
  19. class Order(object):
  20. def __init__(self, platform, order_id, order_date, customer_id = None):
  21. assert type(platform) is unicode
  22. self.platform = platform
  23. assert type(order_id) is unicode
  24. self.order_id = order_id
  25. if type(order_date) is datetime.datetime:
  26. order_date = order_date.date()
  27. assert type(order_date) is datetime.date
  28. self.order_date = order_date
  29. assert customer_id is None or type(customer_id) is unicode
  30. self.customer_id = customer_id
  31. self.items = []
  32. self.discounts = []
  33. def dict_repr(self):
  34. return {k: v for (k, v) in {
  35. 'articles': self.items,
  36. 'customer_id': self.customer_id,
  37. 'discounts': self.discounts,
  38. 'order_date': self.order_date.strftime('%Y-%m-%d'),
  39. 'order_id': self.order_id,
  40. 'platform': self.platform,
  41. }.items() if v is not None}
  42. yaml.SafeDumper.add_representer(Order, lambda dumper, order: dumper.represent_dict(order.dict_repr()))
  43. class Figure(object):
  44. def __init__(self, value, unit):
  45. self.value = value
  46. assert type(unit) is unicode
  47. self.unit = unit
  48. class Distance(Figure):
  49. def __init__(self, value, unit):
  50. assert type(value) is float
  51. super(Distance, self).__init__(value, unit)
  52. def metres(self):
  53. if self.unit == 'km':
  54. return self.value * 1000
  55. else:
  56. raise Exception()
  57. class Sum(object):
  58. def __init__(self, value, currency):
  59. assert type(value) is float
  60. self.value = value
  61. if currency == u'€':
  62. currency = u'EUR'
  63. assert type(currency) is unicode
  64. assert currency in [u'EUR']
  65. self.currency = currency
  66. class Discount(object):
  67. def __init__(
  68. self,
  69. name = None,
  70. amount = None,
  71. ):
  72. assert type(name) is unicode
  73. self.name = name
  74. assert type(amount) is Sum
  75. assert amount.value >= 0
  76. self.amount = amount
  77. def dict_repr(self):
  78. return {
  79. 'name': self.name,
  80. 'value': self.amount.value,
  81. 'value_currency': self.amount.currency,
  82. }
  83. yaml.SafeDumper.add_representer(Discount, lambda dumper, discount: dumper.represent_dict(discount.dict_repr()))
  84. class Item(object):
  85. def __init__(
  86. self,
  87. name = None,
  88. price_brutto = None,
  89. ):
  90. assert type(name) is unicode
  91. self.name = name
  92. assert type(price_brutto) is Sum
  93. self.price_brutto = price_brutto
  94. def dict_repr(self):
  95. return {
  96. 'name': self.name,
  97. 'price_brutto': self.price_brutto.value,
  98. 'price_brutto_currency': self.price_brutto.currency,
  99. }
  100. yaml.SafeDumper.add_representer(Item, lambda dumper, item: dumper.represent_dict(item.dict_repr()))
  101. class Article(Item):
  102. def __init__(
  103. self,
  104. quantity = None,
  105. authors = [],
  106. state = None,
  107. reseller = None,
  108. shipper = None,
  109. **kwargs
  110. ):
  111. super(Article, self).__init__(**kwargs)
  112. assert type(quantity) is int
  113. self.quantity = quantity
  114. assert type(authors) is list
  115. self.authors = authors
  116. assert state is None or type(state) is unicode
  117. self.state = state
  118. assert reseller is None or type(reseller) is unicode
  119. self.reseller = reseller
  120. assert shipper is None or type(shipper) is unicode
  121. self.shipper = shipper
  122. self.delivery_date = None
  123. def dict_repr(self):
  124. attr = Item.dict_repr(self)
  125. attr.update({
  126. 'delivery_date': self.delivery_date,
  127. 'quantity': self.quantity,
  128. 'reseller': self.reseller,
  129. 'shipper': self.shipper,
  130. 'state': self.state,
  131. })
  132. if len(self.authors) > 0:
  133. attr['authors'] = self.authors
  134. return attr
  135. yaml.SafeDumper.add_representer(Article, lambda dumper, article: dumper.represent_dict(article.dict_repr()))
  136. class Transportation(Item):
  137. def __init__(
  138. self,
  139. departure_point = None,
  140. destination_point = None,
  141. distance = None,
  142. route_map = None,
  143. **kwargs
  144. ):
  145. super(Transportation, self).__init__(**kwargs)
  146. assert type(departure_point) is unicode
  147. self.departure_point = departure_point
  148. assert type(destination_point) is unicode
  149. self.destination_point = destination_point
  150. assert distance is None or type(distance) is Distance
  151. self.distance = distance
  152. assert route_map is None or type(route_map) is str
  153. self.route_map = route_map
  154. def dict_repr(self):
  155. attr = Item.dict_repr(self)
  156. attr.update({
  157. 'departure_point': self.departure_point,
  158. 'destination_point': self.destination_point,
  159. 'distance_metres': self.distance.metres() if self.distance else None,
  160. 'route_map': self.route_map,
  161. })
  162. return attr
  163. yaml.SafeDumper.add_representer(Transportation, lambda dumper, transportation: dumper.represent_dict(transportation.dict_repr()))
  164. class TaxiRide(Transportation):
  165. def __init__(self, name = None, driver = None, arrival_time = None, departure_time = None, **kwargs):
  166. if name is None:
  167. name = u'Taxi Ride'
  168. super(TaxiRide, self).__init__(name = name, **kwargs)
  169. assert type(driver) is unicode
  170. self.driver = driver
  171. assert arrival_time is None or type(arrival_time) is datetime.datetime
  172. self.arrival_time = arrival_time
  173. assert departure_time is None or type(departure_time) is datetime.datetime
  174. self.departure_time = departure_time
  175. def dict_repr(self):
  176. attr = Transportation.dict_repr(self)
  177. attr.update({
  178. 'arrival_time': self.arrival_time.strftime('%Y-%m-%d %H:%M') if self.arrival_time else None,
  179. 'departure_time': self.departure_time.strftime('%Y-%m-%d %H:%M') if self.departure_time else None,
  180. 'driver': self.driver,
  181. })
  182. return attr
  183. yaml.SafeDumper.add_representer(TaxiRide, lambda dumper, taxi_ride: dumper.represent_dict(taxi_ride.dict_repr()))
  184. def parse_amazon(msg):
  185. msg_text = msg.get_payload()[0].get_payload(decode = True).decode('utf-8')
  186. if not u'Amazon.de Bestellbestätigung' in msg_text:
  187. raise Exception('no amazon order confirmation')
  188. orders = []
  189. for order_text in re.split(ur'={32,}', msg_text)[1:-1]:
  190. order_id = re.search(r'Bestellnummer #(.+)', order_text).group(1)
  191. order_date_formatted = re.search(ur'Aufgegeben am (.+)', order_text, re.UNICODE).group(1)
  192. locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
  193. order_date = datetime.datetime.strptime(order_date_formatted.encode('utf-8'), '%d. %B %Y')
  194. order = Order(
  195. u'amazon.de',
  196. order_id,
  197. order_date
  198. )
  199. articles_text = order_text.split('Bestellte(r) Artikel:')[1].split('_' * 10)[0].strip()
  200. for article_text in re.split(ur'\n\t*\n', articles_text):
  201. article_match = re.match(
  202. ur' *((?P<quantity>\d+) x )?(?P<name>.*)\n'
  203. + ur'( *von (?P<authors>.*)\n)?'
  204. + ur' *(?P<price_brutto_currency>[A-Z]+) (?P<price_brutto>\d+,\d+)\n'
  205. + ur'( *Zustand: (?P<state>.*)\n)?'
  206. + ur' *Verkauft von: (?P<reseller>.*)'
  207. + ur'(\n *Versand durch (?P<shipper>.*))?',
  208. article_text,
  209. re.MULTILINE | re.UNICODE
  210. )
  211. if article_match is None:
  212. sys.stderr.write(repr(article_text) + '\n')
  213. raise Exception('could not match article')
  214. article = article_match.groupdict()
  215. order.items.append(Article(
  216. name = article['name'],
  217. price_brutto = Sum(
  218. float(article['price_brutto'].replace(',', '.')),
  219. article['price_brutto_currency']
  220. ),
  221. quantity = int(article['quantity']) if article['quantity'] else 1,
  222. authors = article['authors'].split(',') if article['authors'] else [],
  223. state = article['state'],
  224. reseller = article['reseller'],
  225. shipper = article['shipper'],
  226. ))
  227. orders.append(order)
  228. return orders
  229. def parse_oebb(msg):
  230. msg_text = msg.get_payload()[0].get_payload(decode = True).decode('utf8')
  231. # msg_text = re.sub(
  232. # r'<[^>]+>',
  233. # '',
  234. # HTMLParser.HTMLParser().unescape(msg.get_payload(decode = True).decode('utf8'))
  235. # )
  236. order_match = re.search(
  237. ur'Booking code:\s+(?P<order_id>[\d ]+)\s+'
  238. + ur'Customer number:\s+(?P<customer_id>PV\d+)\s+'
  239. + ur'Booking date:\s+(?P<order_date>.* \d{4})\s',
  240. msg_text,
  241. re.MULTILINE | re.UNICODE
  242. )
  243. order_match_groups = order_match.groupdict()
  244. locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
  245. order_date = datetime.datetime.strptime(
  246. order_match_groups['order_date'],
  247. '%b %d, %Y'
  248. )
  249. order = Order(
  250. u'oebb',
  251. order_match_groups['order_id'],
  252. order_date,
  253. customer_id = order_match_groups['customer_id'],
  254. )
  255. item_match = re.search(
  256. ur'(?P<price_brutto_currency>.)(?P<price_brutto>\d+\.\d+)'
  257. + ur'[\W\w]+'
  258. + ur'Your Booking\s+'
  259. + ur'(?P<departure_point>.*)\s+>\s+(?P<destination_point>.*)',
  260. msg_text,
  261. re.MULTILINE | re.UNICODE
  262. )
  263. item = item_match.groupdict()
  264. order.items.append(Transportation(
  265. name = u'Train Ticket',
  266. price_brutto = Sum(
  267. float(item['price_brutto']),
  268. item['price_brutto_currency'],
  269. ),
  270. departure_point = item['departure_point'],
  271. destination_point = item['destination_point'],
  272. ))
  273. return [order]
  274. def parse_mytaxi(msg):
  275. if not 'mytaxi' in msg.get_payload()[0].get_payload()[0].get_payload(decode = True):
  276. raise Exception('no mytaxi mail')
  277. pdf_compressed = msg.get_payload()[1].get_payload(decode = True)
  278. pdftk = subprocess.Popen(
  279. ['pdftk - output - uncompress'],
  280. shell = True,
  281. stdin = subprocess.PIPE,
  282. stdout = subprocess.PIPE,
  283. )
  284. pdf_uncompressed = pdftk.communicate(
  285. input = pdf_compressed,
  286. )[0].decode('latin-1')
  287. assert type(pdf_uncompressed) is unicode
  288. order_match = re.search(
  289. ur'Rechnungsnummer:[^\(]+\((?P<order_id>\w+)\)',
  290. pdf_uncompressed,
  291. re.MULTILINE | re.UNICODE
  292. )
  293. order_id = order_match.groupdict()['order_id']
  294. ride_match_groups = re.search(
  295. ur'\(Bruttobetrag\)'
  296. + ur'[^\(]+'
  297. + ur'\((?P<price_brutto>\d+,\d+) (?P<price_brutto_currency>.+)\)'
  298. + ur'[\w\W]+'
  299. + ur'\((?P<driver>[^\(]+)\)'
  300. + ur'[^\(]+'
  301. + ur'\(\d+,\d+ .\)'
  302. + ur'[^\(]+'
  303. + ur'\((?P<name>Taxifahrt)'
  304. + ur'[^\(]+'
  305. + ur'\(von: (?P<departure_point>[^\)]+)'
  306. + ur'[^\(]+'
  307. + ur'\(nach: (?P<destination_point>[^\)]+)'
  308. + ur'[\w\W]+'
  309. + ur'Belegdatum \\\(Leistungszeitpunkt\\\):[^\(]+\((?P<arrival_time>\d\d.\d\d.\d\d \d\d:\d\d)\)',
  310. pdf_uncompressed,
  311. re.MULTILINE | re.UNICODE
  312. ).groupdict()
  313. arrival_time = datetime.datetime.strptime(
  314. ride_match_groups['arrival_time'],
  315. '%d.%m.%y %H:%M'
  316. )
  317. order = Order(
  318. u'mytaxi',
  319. order_id,
  320. arrival_time,
  321. )
  322. locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
  323. order.items.append(TaxiRide(
  324. price_brutto = Sum(
  325. float(ride_match_groups['price_brutto'].replace(',', '.')),
  326. # why 0x80 ?
  327. u'EUR' if (ride_match_groups['price_brutto_currency'] == u'\x80')
  328. else ride_match_groups['price_brutto_currency'],
  329. ),
  330. departure_point = ride_match_groups['departure_point'],
  331. destination_point = ride_match_groups['destination_point'],
  332. driver = ride_match_groups['driver'],
  333. arrival_time = arrival_time,
  334. ))
  335. return [order]
  336. def parse_uber(msg):
  337. html = msg.get_payload()[0].get_payload(decode = True)
  338. """ document in html2 has the same structure as the one in html.
  339. only difference is that hyperlink urls in html2 have been
  340. replaced by 'email.uber.com/wf/click?upn=.*' urls.
  341. """
  342. html2 = msg.get_payload()[1].get_payload()[0].get_payload(decode = True)
  343. route_map = msg.get_payload()[1].get_payload()[1].get_payload(decode = True)
  344. doc = BeautifulSoup.BeautifulSoup(
  345. html,
  346. convertEntities = BeautifulSoup.BeautifulSoup.HTML_ENTITIES,
  347. )
  348. # strptime
  349. locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
  350. trip_id = re.search(
  351. ur'[\da-f\-]{36}',
  352. doc.find(text = 'Visit the trip page').parent['href'],
  353. ).group(0)
  354. order = Order(
  355. u'uber',
  356. trip_id,
  357. datetime.datetime.strptime(
  358. doc.find(attrs = {'class': 'date'}).text,
  359. '%B %d, %Y',
  360. ),
  361. )
  362. departure_time_tag = doc.find(attrs = {'class': 'from time'})
  363. departure_time = datetime.datetime.strptime(
  364. departure_time_tag.text,
  365. '%I:%M%p',
  366. ).time()
  367. arrival_time_tag = doc.find(attrs = {'class': 'to time'})
  368. arrival_time = datetime.datetime.strptime(
  369. arrival_time_tag.text,
  370. '%I:%M%p',
  371. ).time()
  372. distance = Distance(
  373. float(doc.find(text = 'kilometers').parent.parent.find(attrs = {'class': 'data'}).text),
  374. u'km',
  375. )
  376. fare = doc.find(attrs = {'class': 'header-price'}).find(attrs = {'class': 'header-fare text-pad'}).text
  377. order.items.append(TaxiRide(
  378. name = doc.find(text = 'CAR').parent.parent.find(attrs = {'class': 'data'}).text + ' Ride',
  379. price_brutto = Sum(float(fare[1:]), fare[0]),
  380. arrival_time = datetime.datetime.combine(order.order_date, arrival_time),
  381. departure_time = datetime.datetime.combine(order.order_date, departure_time),
  382. departure_point = departure_time_tag.parent.find(attrs = {'class': 'address'}).text,
  383. destination_point = arrival_time_tag.parent.find(attrs = {'class': 'address'}).text,
  384. distance = distance,
  385. driver = doc.find(attrs = {'class': 'driver-info'}).text[len('You rode with '):],
  386. route_map = route_map,
  387. ))
  388. return [order]
  389. def parse_yipbee(msg):
  390. text = msg.get_payload()[0].get_payload()[0].get_payload(decode = True).decode('utf-8')
  391. if not u'Vielen Dank für deine Bestellung bei yipbee' in text:
  392. raise Exception('no yipbee confirmation')
  393. order_match_groups = re.search(
  394. ur'[\W\w]+'
  395. + ur'BESTELLUNG: (?P<order_id>\w+) vom (?P<order_time>\d\d.\d\d.\d{4} \d\d:\d\d:\d\d)'
  396. + ur'[\W\w]+'
  397. + ur'GESAMTPREIS\s+'
  398. + ur'(?P<articles_and_discount_text>[\W\w]+)'
  399. + ur'(?P<summary_text>ARTIKEL [\W\w]+)',
  400. text,
  401. re.UNICODE
  402. ).groupdict()
  403. order = Order(
  404. u'yipbee',
  405. order_match_groups['order_id'],
  406. datetime.datetime.strptime(order_match_groups['order_time'], '%d.%m.%Y %H:%M:%S'),
  407. )
  408. for article_match in re.finditer(
  409. ur'(?P<name>[\w\-\.\:,%\(\) ]+ (Klasse \d|[\w\-\. ]+[^\d ]))'
  410. + ur'(?P<total_price>\d+,\d\d) €(?P<quantity>\d)(?P<total_price_2>\d+,\d\d) €',
  411. order_match_groups['articles_and_discount_text'].replace('\n', ' '),
  412. re.UNICODE,
  413. ):
  414. article_match_groups = article_match.groupdict()
  415. total_price = float(article_match_groups['total_price'].replace(',', '.'))
  416. total_price_2 = float(article_match_groups['total_price_2'].replace(',', '.'))
  417. assert abs(total_price - total_price_2) < 0.01, 'expected %f, received %f' % (total_price, total_price_2)
  418. quantity = int(article_match_groups['quantity'])
  419. order.items.append(Article(
  420. name = article_match_groups['name'],
  421. price_brutto = Sum(round(total_price / quantity, 2), u'EUR'),
  422. quantity = quantity,
  423. reseller = u'yipbee',
  424. shipper = u'yipbee',
  425. ))
  426. articles_price = float(text.split('RABATTE')[0].split('ARTIKEL')[-1].strip().split(' ')[0].replace(',', '.'))
  427. assert abs(articles_price - sum([a.price_brutto.value * a.quantity for a in order.items])) < 0.01
  428. discount_tag = BeautifulSoup.BeautifulSoup(
  429. order_match_groups['articles_and_discount_text'],
  430. convertEntities = BeautifulSoup.BeautifulSoup.HTML_ENTITIES,
  431. ).find('tr')
  432. if discount_tag:
  433. name_tag, value_tag = discount_tag.findAll('td', recursive = False)
  434. value, currency = value_tag.text.split(' ')
  435. order.discounts.append(Discount(
  436. name = name_tag.text,
  437. amount = Sum(float(value.replace(',', '.')) * -1, currency),
  438. ))
  439. delivery_price = order_match_groups['summary_text'].split('VERSAND')[1].split('STEUERN')[0].strip()
  440. delivery_price_value, delivery_price_currency = delivery_price.split(' ')
  441. order.items.append(Item(
  442. name = u'Delivery',
  443. price_brutto = Sum(float(delivery_price_value.replace(',', '.')), delivery_price_currency),
  444. ))
  445. return [order]
  446. def parse_yipbee_html(msg):
  447. html = msg.get_payload()[0].get_payload()[1].get_payload(decode = True)
  448. if not 'yipbee' in html:
  449. raise Exception('no yipbee confirmation')
  450. doc = BeautifulSoup.BeautifulSoup(html, convertEntities = BeautifulSoup.BeautifulSoup.HTML_ENTITIES)
  451. content_table = doc.find('table')
  452. order_match_groups = re.search(
  453. ur'Bestellung:(?P<order_id>\w+) vom (?P<order_time>\d\d.\d\d.\d{4} \d\d:\d\d:\d\d)',
  454. content_table.find('table').findAll('tr')[3].text,
  455. re.UNICODE
  456. ).groupdict()
  457. order = Order(
  458. u'yipbee',
  459. order_match_groups['order_id'],
  460. datetime.datetime.strptime(order_match_groups['order_time'], '%d.%m.%Y %H:%M:%S'),
  461. )
  462. articles_table = content_table.find('table').find('tbody').findAll('tr', recursive = False)[4].find('table')
  463. for article_row in articles_table.find('tbody').findAll('tr', recursive = False)[1:]:
  464. article_columns = article_row.findAll('td', recursive = False)
  465. (price, currency) = re.sub(ur'\s+', ' ', article_columns[2].text.replace(u',', u'.')).split(' ')
  466. order.items.append(Article(
  467. name = article_columns[1].text,
  468. price_brutto = Sum(float(price), currency),
  469. quantity = int(article_columns[3].text),
  470. reseller = u'yipbee',
  471. shipper = u'yipbee',
  472. ))
  473. discount_row = content_table.find('table').find('tbody').findAll('tr', recursive = False)[6]
  474. (discount_name, discount_value_with_currency) = [c.text for c in discount_row.findAll('td', recursive = False)]
  475. (discount_value, discount_currency) = discount_value_with_currency.split(' ')
  476. order.discounts.append(Discount(
  477. name = discount_name,
  478. amount = Sum(float(discount_value.replace(',', '.')) * -1, discount_currency)
  479. ))
  480. shipping_costs_table = content_table.find('tbody').findAll('tr', recursive = False)[3].findAll('table')[1]
  481. (shipping_price, shipping_currency) = shipping_costs_table.text.replace(',', '.').split(' ')
  482. order.items.append(Item(
  483. name = u'Delivery',
  484. price_brutto = Sum(float(shipping_price), shipping_currency),
  485. ))
  486. return [order]
  487. def parse_lieferservice(msg):
  488. text = msg.get_payload()[0].get_payload(decode = True).decode('utf-8').replace('\r\n', '\n')
  489. assert type(text) is unicode
  490. if not 'Lieferservice.at' in text:
  491. raise Exception('no lieferservice.at confirmation')
  492. order_match = re.search(
  493. ur'Your order \(.+\) at (?P<restaurant>.*)\s+'
  494. + ur'Your order reference is: (?P<order_id>.*)\s+'
  495. + ur'[\W\w]+'
  496. + ur'Your order\s+'
  497. + ur'(?P<orders_text>[\W\w]+)'
  498. + ur'Delivery costs:\s+(?P<delivery_costs>.*)\s+',
  499. text,
  500. re.UNICODE,
  501. )
  502. order_match_groups = order_match.groupdict()
  503. import time
  504. import email.utils
  505. order_date = datetime.datetime.fromtimestamp(
  506. time.mktime(email.utils.parsedate(msg['Date']))
  507. )
  508. order = Order(
  509. u'lieferservice.at',
  510. order_match_groups['order_id'].strip(),
  511. order_date
  512. )
  513. for article_match in re.finditer(
  514. ur'(?P<quantity>\d+)x\s'
  515. + ur'(?P<name>.*)\s'
  516. + ur'(?P<currency>.) (?P<price>-?\d+,\d+)\s',
  517. order_match_groups['orders_text'],
  518. re.UNICODE,
  519. ):
  520. article_match_groups = article_match.groupdict()
  521. quantity = int(article_match_groups['quantity'])
  522. assert quantity == 1
  523. name = re.sub(ur' +', ' ', article_match_groups['name'])
  524. price = Sum(
  525. float(article_match_groups['price'].replace(',', '.')),
  526. article_match_groups['currency'],
  527. )
  528. if price.value < 0:
  529. price.value *= -1
  530. order.discounts.append(Discount(
  531. name = name,
  532. amount = price,
  533. ))
  534. else:
  535. order.items.append(Article(
  536. name = name,
  537. quantity = 1,
  538. price_brutto = price,
  539. reseller = order_match_groups['restaurant'],
  540. shipper = order_match_groups['restaurant'],
  541. ))
  542. delivery_costs = order_match_groups['delivery_costs'].strip()
  543. assert delivery_costs == 'FREE'
  544. order.items.append(Item(
  545. name = u'Delivery',
  546. price_brutto = Sum(float('0'.replace(',', '.')), u'EUR'),
  547. ))
  548. return [order]
  549. def parse(msg):
  550. tracebacks = {}
  551. try:
  552. return parse_amazon(msg)
  553. except:
  554. tracebacks['amazon'] = traceback.format_exc()
  555. try:
  556. return parse_oebb(msg)
  557. except:
  558. tracebacks['oebb'] = traceback.format_exc()
  559. try:
  560. return parse_lieferservice(msg)
  561. except:
  562. tracebacks['lieferservice'] = traceback.format_exc()
  563. try:
  564. return parse_mytaxi(msg)
  565. except:
  566. tracebacks['mytaxi'] = traceback.format_exc()
  567. try:
  568. return parse_uber(msg)
  569. except:
  570. tracebacks['uber'] = traceback.format_exc()
  571. try:
  572. return parse_yipbee(msg)
  573. except:
  574. tracebacks['yipbee'] = traceback.format_exc()
  575. for parser_name in tracebacks:
  576. sys.stderr.write('%s parser: \n%s\n' % (parser_name, tracebacks[parser_name]))
  577. raise Exception('failed to parse')
  578. def compute(register_path):
  579. msg = email.message_from_string(sys.stdin.read())
  580. orders = parse(msg)
  581. if register_path:
  582. with open(register_path, 'r') as register:
  583. registered_orders = yaml.load(register.read().decode('utf-8'))
  584. if not registered_orders:
  585. registered_orders = {}
  586. for order in orders:
  587. if order.platform not in registered_orders:
  588. registered_orders[order.platform] = {}
  589. if order.order_id in registered_orders[order.platform]:
  590. raise Exception('already registered')
  591. registered_orders[order.platform][order.order_id] = order
  592. with open(register_path, 'w') as register:
  593. register.write(yaml.safe_dump(registered_orders, default_flow_style = False))
  594. else:
  595. print(yaml.safe_dump(orders, default_flow_style = False))
  596. def _init_argparser():
  597. argparser = argparse.ArgumentParser(description = None)
  598. argparser.add_argument('--register', metavar = 'path', dest = 'register_path')
  599. return argparser
  600. def main(argv):
  601. argparser = _init_argparser()
  602. argcomplete.autocomplete(argparser)
  603. args = argparser.parse_args(argv)
  604. compute(**vars(args))
  605. return 0
  606. if __name__ == "__main__":
  607. sys.exit(main(sys.argv[1:]))