# -*- coding: utf-8 -*- import BeautifulSoup import datetime import dingguo import email import ioex import re def parse_order_confirmation_mail(mail): assert isinstance(mail, email.message.Message) html = mail.get_payload()[0].get_payload(decode = True) """ document in html2 has the same structure as the one in html. only difference is that hyperlink urls in html2 have been replaced by 'email.uber.com/wf/click?upn=.*' urls. """ html2 = mail.get_payload()[1].get_payload()[0].get_payload(decode = True) route_map = mail.get_payload()[1].get_payload()[1].get_payload(decode = True) doc = BeautifulSoup.BeautifulSoup( html, convertEntities = BeautifulSoup.BeautifulSoup.HTML_ENTITIES, ) # strptime with ioex.setlocale('en_US.UTF-8'): trip_id = re.search( ur'[\da-f\-]{36}', doc.find(text = 'Visit the trip page').parent['href'], ).group(0) order = dingguo.Order( u'uber', trip_id, datetime.datetime.strptime( doc.find(attrs = {'class': 'date'}).text, '%B %d, %Y', ).date(), ) departure_time_tag = doc.find(attrs = {'class': 'from time'}) departure_time = datetime.datetime.strptime( departure_time_tag.text, '%I:%M%p', ).time() arrival_time_tag = doc.find(attrs = {'class': 'to time'}) arrival_time = datetime.datetime.strptime( arrival_time_tag.text, '%I:%M%p', ).time() distance = dingguo.Distance( float(doc.find(text = 'kilometers').parent.parent.find(attrs = {'class': 'data'}).text), u'km', ) fare = doc.find(attrs = {'class': 'header-price'}).find(attrs = {'class': 'header-fare text-pad'}).text order.items.append(dingguo.TaxiRide( name = doc.find(text = 'CAR').parent.parent.find(attrs = {'class': 'data'}).text + ' Ride', price_brutto = dingguo.Sum(float(fare[1:]), fare[0]), arrival_time = datetime.datetime.combine(order.order_date, arrival_time), departure_time = datetime.datetime.combine(order.order_date, departure_time), departure_point = departure_time_tag.parent.find(attrs = {'class': 'address'}).text, destination_point = arrival_time_tag.parent.find(attrs = {'class': 'address'}).text, distance = distance, driver = doc.find(attrs = {'class': 'driver-info'}).text[len('You rode with '):], route_map = route_map, )) return [order]