order-confirmation-mail-parser 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. # PYTHON_ARGCOMPLETE_OK
  4. import dingguo
  5. import re
  6. import os
  7. import sys
  8. import yaml
  9. import email
  10. import pprint
  11. import random
  12. import locale
  13. import argparse
  14. import datetime
  15. import traceback
  16. import subprocess
  17. import HTMLParser
  18. import argcomplete
  19. import BeautifulSoup
  20. class Order(object):
  21. def __init__(self, platform, order_id, order_date, customer_id = None):
  22. assert type(platform) is unicode
  23. self.platform = platform
  24. assert type(order_id) is unicode
  25. self.order_id = order_id
  26. if type(order_date) is datetime.datetime:
  27. order_date = order_date.date()
  28. assert type(order_date) is datetime.date
  29. self.order_date = order_date
  30. assert customer_id is None or type(customer_id) is unicode
  31. self.customer_id = customer_id
  32. self.items = []
  33. self.discounts = []
  34. def dict_repr(self):
  35. return {k: v for (k, v) in {
  36. 'articles': self.items,
  37. 'customer_id': self.customer_id,
  38. 'discounts': self.discounts,
  39. 'order_date': self.order_date.strftime('%Y-%m-%d'),
  40. 'order_id': self.order_id,
  41. 'platform': self.platform,
  42. }.items() if v is not None}
  43. yaml.SafeDumper.add_representer(Order, lambda dumper, order: dumper.represent_dict(order.dict_repr()))
  44. class Distance(dingguo.Figure):
  45. def __init__(self, value, unit):
  46. assert type(value) is float
  47. super(Distance, self).__init__(value, unit)
  48. def metres(self):
  49. if self.unit == 'km':
  50. return self.value * 1000
  51. else:
  52. raise Exception()
  53. class Sum(object):
  54. def __init__(self, value, currency):
  55. assert type(value) is float
  56. self.value = value
  57. if currency == u'€':
  58. currency = u'EUR'
  59. assert type(currency) is unicode
  60. assert currency in [u'EUR']
  61. self.currency = currency
  62. class Discount(object):
  63. def __init__(
  64. self,
  65. name = None,
  66. amount = None,
  67. ):
  68. assert type(name) is unicode
  69. self.name = name
  70. assert type(amount) is Sum
  71. assert amount.value >= 0
  72. self.amount = amount
  73. def dict_repr(self):
  74. return {
  75. 'name': self.name,
  76. 'value': self.amount.value,
  77. 'value_currency': self.amount.currency,
  78. }
  79. yaml.SafeDumper.add_representer(Discount, lambda dumper, discount: dumper.represent_dict(discount.dict_repr()))
  80. class Item(object):
  81. def __init__(
  82. self,
  83. name = None,
  84. price_brutto = None,
  85. ):
  86. assert type(name) is unicode
  87. self.name = name
  88. assert type(price_brutto) is Sum
  89. self.price_brutto = price_brutto
  90. def dict_repr(self):
  91. return {
  92. 'name': self.name,
  93. 'price_brutto': self.price_brutto.value,
  94. 'price_brutto_currency': self.price_brutto.currency,
  95. }
  96. yaml.SafeDumper.add_representer(Item, lambda dumper, item: dumper.represent_dict(item.dict_repr()))
  97. class Article(Item):
  98. def __init__(
  99. self,
  100. quantity = None,
  101. authors = [],
  102. state = None,
  103. reseller = None,
  104. shipper = None,
  105. **kwargs
  106. ):
  107. super(Article, self).__init__(**kwargs)
  108. assert type(quantity) is int
  109. self.quantity = quantity
  110. assert type(authors) is list
  111. self.authors = authors
  112. assert state is None or type(state) is unicode
  113. self.state = state
  114. assert reseller is None or type(reseller) is unicode
  115. self.reseller = reseller
  116. assert shipper is None or type(shipper) is unicode
  117. self.shipper = shipper
  118. self.delivery_date = None
  119. def dict_repr(self):
  120. attr = Item.dict_repr(self)
  121. attr.update({
  122. 'delivery_date': self.delivery_date,
  123. 'quantity': self.quantity,
  124. 'reseller': self.reseller,
  125. 'shipper': self.shipper,
  126. 'state': self.state,
  127. })
  128. if len(self.authors) > 0:
  129. attr['authors'] = self.authors
  130. return attr
  131. yaml.SafeDumper.add_representer(Article, lambda dumper, article: dumper.represent_dict(article.dict_repr()))
  132. class Transportation(Item):
  133. def __init__(
  134. self,
  135. departure_point = None,
  136. destination_point = None,
  137. distance = None,
  138. route_map = None,
  139. **kwargs
  140. ):
  141. super(Transportation, self).__init__(**kwargs)
  142. assert type(departure_point) is unicode
  143. self.departure_point = departure_point
  144. assert type(destination_point) is unicode
  145. self.destination_point = destination_point
  146. assert distance is None or type(distance) is Distance
  147. self.distance = distance
  148. assert route_map is None or type(route_map) is str
  149. self.route_map = route_map
  150. def dict_repr(self):
  151. attr = Item.dict_repr(self)
  152. attr.update({
  153. 'departure_point': self.departure_point,
  154. 'destination_point': self.destination_point,
  155. 'distance_metres': self.distance.metres() if self.distance else None,
  156. 'route_map': self.route_map,
  157. })
  158. return attr
  159. yaml.SafeDumper.add_representer(Transportation, lambda dumper, transportation: dumper.represent_dict(transportation.dict_repr()))
  160. class TaxiRide(Transportation):
  161. def __init__(self, name = None, driver = None, arrival_time = None, departure_time = None, **kwargs):
  162. if name is None:
  163. name = u'Taxi Ride'
  164. super(TaxiRide, self).__init__(name = name, **kwargs)
  165. assert type(driver) is unicode
  166. self.driver = driver
  167. assert arrival_time is None or type(arrival_time) is datetime.datetime
  168. self.arrival_time = arrival_time
  169. assert departure_time is None or type(departure_time) is datetime.datetime
  170. self.departure_time = departure_time
  171. def dict_repr(self):
  172. attr = Transportation.dict_repr(self)
  173. attr.update({
  174. 'arrival_time': self.arrival_time.strftime('%Y-%m-%d %H:%M') if self.arrival_time else None,
  175. 'departure_time': self.departure_time.strftime('%Y-%m-%d %H:%M') if self.departure_time else None,
  176. 'driver': self.driver,
  177. })
  178. return attr
  179. yaml.SafeDumper.add_representer(TaxiRide, lambda dumper, taxi_ride: dumper.represent_dict(taxi_ride.dict_repr()))
  180. def parse_amazon(msg):
  181. msg_text = msg.get_payload()[0].get_payload(decode = True).decode('utf-8')
  182. if not u'Amazon.de Bestellbestätigung' in msg_text:
  183. raise Exception('no amazon order confirmation')
  184. orders = []
  185. for order_text in re.split(ur'={32,}', msg_text)[1:-1]:
  186. order_id = re.search(r'Bestellnummer #(.+)', order_text).group(1)
  187. order_date_formatted = re.search(ur'Aufgegeben am (.+)', order_text, re.UNICODE).group(1)
  188. locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
  189. order_date = datetime.datetime.strptime(order_date_formatted.encode('utf-8'), '%d. %B %Y')
  190. order = Order(
  191. u'amazon.de',
  192. order_id,
  193. order_date
  194. )
  195. articles_text = order_text.split('Bestellte(r) Artikel:')[1].split('_' * 10)[0].strip()
  196. for article_text in re.split(ur'\n\t*\n', articles_text):
  197. article_match = re.match(
  198. ur' *((?P<quantity>\d+) x )?(?P<name>.*)\n'
  199. + ur'( *von (?P<authors>.*)\n)?'
  200. + ur' *(?P<price_brutto_currency>[A-Z]+) (?P<price_brutto>\d+,\d+)\n'
  201. + ur'( *Zustand: (?P<state>.*)\n)?'
  202. + ur' *Verkauft von: (?P<reseller>.*)'
  203. + ur'(\n *Versand durch (?P<shipper>.*))?',
  204. article_text,
  205. re.MULTILINE | re.UNICODE
  206. )
  207. if article_match is None:
  208. sys.stderr.write(repr(article_text) + '\n')
  209. raise Exception('could not match article')
  210. article = article_match.groupdict()
  211. order.items.append(Article(
  212. name = article['name'],
  213. price_brutto = Sum(
  214. float(article['price_brutto'].replace(',', '.')),
  215. article['price_brutto_currency']
  216. ),
  217. quantity = int(article['quantity']) if article['quantity'] else 1,
  218. authors = article['authors'].split(',') if article['authors'] else [],
  219. state = article['state'],
  220. reseller = article['reseller'],
  221. shipper = article['shipper'],
  222. ))
  223. orders.append(order)
  224. return orders
  225. def parse_oebb(msg):
  226. msg_text = msg.get_payload()[0].get_payload(decode = True).decode('utf8')
  227. # msg_text = re.sub(
  228. # r'<[^>]+>',
  229. # '',
  230. # HTMLParser.HTMLParser().unescape(msg.get_payload(decode = True).decode('utf8'))
  231. # )
  232. order_match = re.search(
  233. ur'Booking code:\s+(?P<order_id>[\d ]+)\s+'
  234. + ur'Customer number:\s+(?P<customer_id>PV\d+)\s+'
  235. + ur'Booking date:\s+(?P<order_date>.* \d{4})\s',
  236. msg_text,
  237. re.MULTILINE | re.UNICODE
  238. )
  239. order_match_groups = order_match.groupdict()
  240. locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
  241. order_date = datetime.datetime.strptime(
  242. order_match_groups['order_date'],
  243. '%b %d, %Y'
  244. )
  245. order = Order(
  246. u'oebb',
  247. order_match_groups['order_id'],
  248. order_date,
  249. customer_id = order_match_groups['customer_id'],
  250. )
  251. item_match = re.search(
  252. ur'(?P<price_brutto_currency>.)(?P<price_brutto>\d+\.\d+)'
  253. + ur'[\W\w]+'
  254. + ur'Your Booking\s+'
  255. + ur'(?P<departure_point>.*)\s+>\s+(?P<destination_point>.*)',
  256. msg_text,
  257. re.MULTILINE | re.UNICODE
  258. )
  259. item = item_match.groupdict()
  260. order.items.append(Transportation(
  261. name = u'Train Ticket',
  262. price_brutto = Sum(
  263. float(item['price_brutto']),
  264. item['price_brutto_currency'],
  265. ),
  266. departure_point = item['departure_point'],
  267. destination_point = item['destination_point'],
  268. ))
  269. return [order]
  270. def parse_mytaxi(msg):
  271. if not 'mytaxi' in msg.get_payload()[0].get_payload()[0].get_payload(decode = True):
  272. raise Exception('no mytaxi mail')
  273. pdf_compressed = msg.get_payload()[1].get_payload(decode = True)
  274. pdftk = subprocess.Popen(
  275. ['pdftk - output - uncompress'],
  276. shell = True,
  277. stdin = subprocess.PIPE,
  278. stdout = subprocess.PIPE,
  279. )
  280. pdf_uncompressed = pdftk.communicate(
  281. input = pdf_compressed,
  282. )[0].decode('latin-1')
  283. assert type(pdf_uncompressed) is unicode
  284. order_match = re.search(
  285. ur'Rechnungsnummer:[^\(]+\((?P<order_id>\w+)\)',
  286. pdf_uncompressed,
  287. re.MULTILINE | re.UNICODE
  288. )
  289. order_id = order_match.groupdict()['order_id']
  290. ride_match_groups = re.search(
  291. ur'\(Bruttobetrag\)'
  292. + ur'[^\(]+'
  293. + ur'\((?P<price_brutto>\d+,\d+) (?P<price_brutto_currency>.+)\)'
  294. + ur'[\w\W]+'
  295. + ur'\((?P<driver>[^\(]+)\)'
  296. + ur'[^\(]+'
  297. + ur'\(\d+,\d+ .\)'
  298. + ur'[^\(]+'
  299. + ur'\((?P<name>Taxifahrt)'
  300. + ur'[^\(]+'
  301. + ur'\(von: (?P<departure_point>[^\)]+)'
  302. + ur'[^\(]+'
  303. + ur'\(nach: (?P<destination_point>[^\)]+)'
  304. + ur'[\w\W]+'
  305. + ur'Belegdatum \\\(Leistungszeitpunkt\\\):[^\(]+\((?P<arrival_time>\d\d.\d\d.\d\d \d\d:\d\d)\)',
  306. pdf_uncompressed,
  307. re.MULTILINE | re.UNICODE
  308. ).groupdict()
  309. arrival_time = datetime.datetime.strptime(
  310. ride_match_groups['arrival_time'],
  311. '%d.%m.%y %H:%M'
  312. )
  313. order = Order(
  314. u'mytaxi',
  315. order_id,
  316. arrival_time,
  317. )
  318. locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
  319. order.items.append(TaxiRide(
  320. price_brutto = Sum(
  321. float(ride_match_groups['price_brutto'].replace(',', '.')),
  322. # why 0x80 ?
  323. u'EUR' if (ride_match_groups['price_brutto_currency'] == u'\x80')
  324. else ride_match_groups['price_brutto_currency'],
  325. ),
  326. departure_point = ride_match_groups['departure_point'],
  327. destination_point = ride_match_groups['destination_point'],
  328. driver = ride_match_groups['driver'],
  329. arrival_time = arrival_time,
  330. ))
  331. return [order]
  332. def parse_uber(msg):
  333. html = msg.get_payload()[0].get_payload(decode = True)
  334. """ document in html2 has the same structure as the one in html.
  335. only difference is that hyperlink urls in html2 have been
  336. replaced by 'email.uber.com/wf/click?upn=.*' urls.
  337. """
  338. html2 = msg.get_payload()[1].get_payload()[0].get_payload(decode = True)
  339. route_map = msg.get_payload()[1].get_payload()[1].get_payload(decode = True)
  340. doc = BeautifulSoup.BeautifulSoup(
  341. html,
  342. convertEntities = BeautifulSoup.BeautifulSoup.HTML_ENTITIES,
  343. )
  344. # strptime
  345. locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
  346. trip_id = re.search(
  347. ur'[\da-f\-]{36}',
  348. doc.find(text = 'Visit the trip page').parent['href'],
  349. ).group(0)
  350. order = Order(
  351. u'uber',
  352. trip_id,
  353. datetime.datetime.strptime(
  354. doc.find(attrs = {'class': 'date'}).text,
  355. '%B %d, %Y',
  356. ),
  357. )
  358. departure_time_tag = doc.find(attrs = {'class': 'from time'})
  359. departure_time = datetime.datetime.strptime(
  360. departure_time_tag.text,
  361. '%I:%M%p',
  362. ).time()
  363. arrival_time_tag = doc.find(attrs = {'class': 'to time'})
  364. arrival_time = datetime.datetime.strptime(
  365. arrival_time_tag.text,
  366. '%I:%M%p',
  367. ).time()
  368. distance = Distance(
  369. float(doc.find(text = 'kilometers').parent.parent.find(attrs = {'class': 'data'}).text),
  370. u'km',
  371. )
  372. fare = doc.find(attrs = {'class': 'header-price'}).find(attrs = {'class': 'header-fare text-pad'}).text
  373. order.items.append(TaxiRide(
  374. name = doc.find(text = 'CAR').parent.parent.find(attrs = {'class': 'data'}).text + ' Ride',
  375. price_brutto = Sum(float(fare[1:]), fare[0]),
  376. arrival_time = datetime.datetime.combine(order.order_date, arrival_time),
  377. departure_time = datetime.datetime.combine(order.order_date, departure_time),
  378. departure_point = departure_time_tag.parent.find(attrs = {'class': 'address'}).text,
  379. destination_point = arrival_time_tag.parent.find(attrs = {'class': 'address'}).text,
  380. distance = distance,
  381. driver = doc.find(attrs = {'class': 'driver-info'}).text[len('You rode with '):],
  382. route_map = route_map,
  383. ))
  384. return [order]
  385. def parse_yipbee(msg):
  386. text = msg.get_payload()[0].get_payload()[0].get_payload(decode = True).decode('utf-8')
  387. if not u'Vielen Dank für deine Bestellung bei yipbee' in text:
  388. raise Exception('no yipbee confirmation')
  389. order_match_groups = re.search(
  390. ur'[\W\w]+'
  391. + ur'BESTELLUNG: (?P<order_id>\w+) vom (?P<order_time>\d\d.\d\d.\d{4} \d\d:\d\d:\d\d)'
  392. + ur'[\W\w]+'
  393. + ur'GESAMTPREIS\s+'
  394. + ur'(?P<articles_and_discount_text>[\W\w]+)'
  395. + ur'(?P<summary_text>ARTIKEL [\W\w]+)',
  396. text,
  397. re.UNICODE
  398. ).groupdict()
  399. order = Order(
  400. u'yipbee',
  401. order_match_groups['order_id'],
  402. datetime.datetime.strptime(order_match_groups['order_time'], '%d.%m.%Y %H:%M:%S'),
  403. )
  404. for article_match in re.finditer(
  405. ur'(?P<name>[\w\-\.\:,%\(\) ]+ (Klasse \d|[\w\-\. ]+[^\d ]))'
  406. + ur'(?P<total_price>\d+,\d\d) €(?P<quantity>\d)(?P<total_price_2>\d+,\d\d) €',
  407. order_match_groups['articles_and_discount_text'].replace('\n', ' '),
  408. re.UNICODE,
  409. ):
  410. article_match_groups = article_match.groupdict()
  411. total_price = float(article_match_groups['total_price'].replace(',', '.'))
  412. total_price_2 = float(article_match_groups['total_price_2'].replace(',', '.'))
  413. assert abs(total_price - total_price_2) < 0.01, 'expected %f, received %f' % (total_price, total_price_2)
  414. quantity = int(article_match_groups['quantity'])
  415. order.items.append(Article(
  416. name = article_match_groups['name'],
  417. price_brutto = Sum(round(total_price / quantity, 2), u'EUR'),
  418. quantity = quantity,
  419. reseller = u'yipbee',
  420. shipper = u'yipbee',
  421. ))
  422. articles_price = float(text.split('RABATTE')[0].split('ARTIKEL')[-1].strip().split(' ')[0].replace(',', '.'))
  423. assert abs(articles_price - sum([a.price_brutto.value * a.quantity for a in order.items])) < 0.01
  424. discount_tag = BeautifulSoup.BeautifulSoup(
  425. order_match_groups['articles_and_discount_text'],
  426. convertEntities = BeautifulSoup.BeautifulSoup.HTML_ENTITIES,
  427. ).find('tr')
  428. if discount_tag:
  429. name_tag, value_tag = discount_tag.findAll('td', recursive = False)
  430. value, currency = value_tag.text.split(' ')
  431. order.discounts.append(Discount(
  432. name = name_tag.text,
  433. amount = Sum(float(value.replace(',', '.')) * -1, currency),
  434. ))
  435. delivery_price = order_match_groups['summary_text'].split('VERSAND')[1].split('STEUERN')[0].strip()
  436. delivery_price_value, delivery_price_currency = delivery_price.split(' ')
  437. order.items.append(Item(
  438. name = u'Delivery',
  439. price_brutto = Sum(float(delivery_price_value.replace(',', '.')), delivery_price_currency),
  440. ))
  441. return [order]
  442. def parse_yipbee_html(msg):
  443. html = msg.get_payload()[0].get_payload()[1].get_payload(decode = True)
  444. if not 'yipbee' in html:
  445. raise Exception('no yipbee confirmation')
  446. doc = BeautifulSoup.BeautifulSoup(html, convertEntities = BeautifulSoup.BeautifulSoup.HTML_ENTITIES)
  447. content_table = doc.find('table')
  448. order_match_groups = re.search(
  449. ur'Bestellung:(?P<order_id>\w+) vom (?P<order_time>\d\d.\d\d.\d{4} \d\d:\d\d:\d\d)',
  450. content_table.find('table').findAll('tr')[3].text,
  451. re.UNICODE
  452. ).groupdict()
  453. order = Order(
  454. u'yipbee',
  455. order_match_groups['order_id'],
  456. datetime.datetime.strptime(order_match_groups['order_time'], '%d.%m.%Y %H:%M:%S'),
  457. )
  458. articles_table = content_table.find('table').find('tbody').findAll('tr', recursive = False)[4].find('table')
  459. for article_row in articles_table.find('tbody').findAll('tr', recursive = False)[1:]:
  460. article_columns = article_row.findAll('td', recursive = False)
  461. (price, currency) = re.sub(ur'\s+', ' ', article_columns[2].text.replace(u',', u'.')).split(' ')
  462. order.items.append(Article(
  463. name = article_columns[1].text,
  464. price_brutto = Sum(float(price), currency),
  465. quantity = int(article_columns[3].text),
  466. reseller = u'yipbee',
  467. shipper = u'yipbee',
  468. ))
  469. discount_row = content_table.find('table').find('tbody').findAll('tr', recursive = False)[6]
  470. (discount_name, discount_value_with_currency) = [c.text for c in discount_row.findAll('td', recursive = False)]
  471. (discount_value, discount_currency) = discount_value_with_currency.split(' ')
  472. order.discounts.append(Discount(
  473. name = discount_name,
  474. amount = Sum(float(discount_value.replace(',', '.')) * -1, discount_currency)
  475. ))
  476. shipping_costs_table = content_table.find('tbody').findAll('tr', recursive = False)[3].findAll('table')[1]
  477. (shipping_price, shipping_currency) = shipping_costs_table.text.replace(',', '.').split(' ')
  478. order.items.append(Item(
  479. name = u'Delivery',
  480. price_brutto = Sum(float(shipping_price), shipping_currency),
  481. ))
  482. return [order]
  483. def parse_lieferservice(msg):
  484. text = msg.get_payload()[0].get_payload(decode = True).decode('utf-8').replace('\r\n', '\n')
  485. assert type(text) is unicode
  486. if not 'Lieferservice.at' in text:
  487. raise Exception('no lieferservice.at confirmation')
  488. order_match = re.search(
  489. ur'Your order \(.+\) at (?P<restaurant>.*)\s+'
  490. + ur'Your order reference is: (?P<order_id>.*)\s+'
  491. + ur'[\W\w]+'
  492. + ur'Your order\s+'
  493. + ur'(?P<orders_text>[\W\w]+)'
  494. + ur'Delivery costs:\s+(?P<delivery_costs>.*)\s+',
  495. text,
  496. re.UNICODE,
  497. )
  498. order_match_groups = order_match.groupdict()
  499. import time
  500. import email.utils
  501. order_date = datetime.datetime.fromtimestamp(
  502. time.mktime(email.utils.parsedate(msg['Date']))
  503. )
  504. order = Order(
  505. u'lieferservice.at',
  506. order_match_groups['order_id'].strip(),
  507. order_date
  508. )
  509. for article_match in re.finditer(
  510. ur'(?P<quantity>\d+)x\s'
  511. + ur'(?P<name>.*)\s'
  512. + ur'(?P<currency>.) (?P<price>-?\d+,\d+)\s',
  513. order_match_groups['orders_text'],
  514. re.UNICODE,
  515. ):
  516. article_match_groups = article_match.groupdict()
  517. quantity = int(article_match_groups['quantity'])
  518. assert quantity == 1
  519. name = re.sub(ur' +', ' ', article_match_groups['name'])
  520. price = Sum(
  521. float(article_match_groups['price'].replace(',', '.')),
  522. article_match_groups['currency'],
  523. )
  524. if price.value < 0:
  525. price.value *= -1
  526. order.discounts.append(Discount(
  527. name = name,
  528. amount = price,
  529. ))
  530. else:
  531. order.items.append(Article(
  532. name = name,
  533. quantity = 1,
  534. price_brutto = price,
  535. reseller = order_match_groups['restaurant'],
  536. shipper = order_match_groups['restaurant'],
  537. ))
  538. delivery_costs = order_match_groups['delivery_costs'].strip()
  539. assert delivery_costs == 'FREE'
  540. order.items.append(Item(
  541. name = u'Delivery',
  542. price_brutto = Sum(float('0'.replace(',', '.')), u'EUR'),
  543. ))
  544. return [order]
  545. def parse(msg):
  546. tracebacks = {}
  547. try:
  548. return parse_amazon(msg)
  549. except:
  550. tracebacks['amazon'] = traceback.format_exc()
  551. try:
  552. return parse_oebb(msg)
  553. except:
  554. tracebacks['oebb'] = traceback.format_exc()
  555. try:
  556. return parse_lieferservice(msg)
  557. except:
  558. tracebacks['lieferservice'] = traceback.format_exc()
  559. try:
  560. return parse_mytaxi(msg)
  561. except:
  562. tracebacks['mytaxi'] = traceback.format_exc()
  563. try:
  564. return parse_uber(msg)
  565. except:
  566. tracebacks['uber'] = traceback.format_exc()
  567. try:
  568. return parse_yipbee(msg)
  569. except:
  570. tracebacks['yipbee'] = traceback.format_exc()
  571. for parser_name in tracebacks:
  572. sys.stderr.write('%s parser: \n%s\n' % (parser_name, tracebacks[parser_name]))
  573. raise Exception('failed to parse')
  574. def compute(register_path):
  575. msg = email.message_from_string(sys.stdin.read())
  576. orders = parse(msg)
  577. if register_path:
  578. with open(register_path, 'r') as register:
  579. registered_orders = yaml.load(register.read().decode('utf-8'))
  580. if not registered_orders:
  581. registered_orders = {}
  582. for order in orders:
  583. if order.platform not in registered_orders:
  584. registered_orders[order.platform] = {}
  585. if order.order_id in registered_orders[order.platform]:
  586. raise Exception('already registered')
  587. registered_orders[order.platform][order.order_id] = order
  588. with open(register_path, 'w') as register:
  589. register.write(yaml.safe_dump(registered_orders, default_flow_style = False))
  590. else:
  591. print(yaml.safe_dump(orders, default_flow_style = False))
  592. def _init_argparser():
  593. argparser = argparse.ArgumentParser(description = None)
  594. argparser.add_argument('--register', metavar = 'path', dest = 'register_path')
  595. return argparser
  596. def main(argv):
  597. argparser = _init_argparser()
  598. argcomplete.autocomplete(argparser)
  599. args = argparser.parse_args(argv)
  600. compute(**vars(args))
  601. return 0
  602. if __name__ == "__main__":
  603. sys.exit(main(sys.argv[1:]))