Tonight I went back to some earlier data wanking I'd done on my email. Namely, "what hour do I receive the most email?" except this time, there was no two scripts -- the entire program is 57 lines of code, a lot of which is logging (I like to see what's going on, ok?):
#!python
import argparse
import email
import ggplot
import pyimgur
import logging
import os
import pandas as pd
import poplib
import tempfile
if __name__ == '__main__':
argparser = argparse.ArgumentParser(description='Which hours does one receive gmail?')
argparser.add_argument('-s', '--server', action='store', help='POP3 SSL server, defaults to pop.gmail.com', default='pop.gmail.com')
argparser.add_argument('-u', '--user', action='store', help='Username, fully qualified with domain')
argparser.add_argument('-p', '--password', action='store', help='Password for username, not stored')
argparser.add_argument('-v', '--verbose', help='Enable debugging', action='store_true')
arguments = argparser.parse_args()
if arguments.verbose:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.FATAL)
# connect to host using SSL
pop3_mail = poplib.POP3_SSL(arguments.server)
# print the response message from server
print pop3_mail.getwelcome()
# send user
pop3_mail.user(arguments.user)
# send password
pop3_mail.pass_(arguments.password)
logging.debug('logged in to mail')
data = pop3_mail.stat()[0]
logging.debug('All selected')
dates = pd.DataFrame(columns=['Hour of Day', 'Number of Messages'])
dates['Hour of Day'] = xrange(1, 25)
dates['Number of Messages'] = [0 for r in xrange(1, 25)]
logging.debug('data frame initalised')
logging.debug('{} total messages'.format(data))
logging.debug('iterating through messages')
for message_number in xrange(1, data):
raw_email = pop3_mail.retr(message_number)[1]
logging.debug('Email: {}'.format(raw_email))
message = email.message_from_string('\n'.join(raw_email))
date = email.utils.parsedate_tz(message['Date'])
logging.debug('Date: {}'.format(date))
hour = date[3]
logging.debug('Hour: {}'.format(hour))
dates.xs(hour, copy=False)['Number of Messages'] = dates.xs(hour)['Number of Messages']+1
logging.debug(dates)
plot = ggplot.ggplot(dates, ggplot.aes(x='Number of Messages')) + ggplot.geom_bar() + ggplot.ggtitle('Hour of Day for emails to {}'.format(arguments.user)) + ggplot.xlab('Message Count / hour') + ggplot.theme_seaborn()
logging.debug('generated plot')
out = tempfile.NamedTemporaryFile(prefix='mail', suffix='.png')
ggplot.ggsave(filename=out.name, plot=plot)
im = pyimgur.Imgur(u'72a4e1b18bf0d6b')
uploaded_image = im.upload_image(out.name, title="Uploaded with PyImgur")
print('Plot available at {}'.format(uploaded_image.link))
A sample run results in the following image:
No comments:
Post a Comment