September 21, 2014

How to Visualise Email Traffic #2

It turns out I was not looking to use a histogram for the last visualisation, rather, I sought a bar graph. I also added functionality to make the png open automatically along with dropping ggplot for matlib, upon which ggplot is a wrapper anyway. That's the high level summary of changes, here's the new code:

#!~/.virtualenvs/statpy/bin/python
import argparse
import email
import matplotlib.pyplot as plt
import pyimgur
import logging
import pandas as pd
import poplib
import tempfile
import webbrowser

HOUR_INDEX = 3

if __name__ == '__main__':
    argparser = argparse.ArgumentParser(description='Which hours does one receive gmail?')
    argparser.add_argument('-s', '--server', action='store', help='POP3 SSL server, defaults to pop.gmail.com', default='pop.gmail.com')
    argparser.add_argument('-u', '--user', action='store', help='Username, fully qualified with domain')
    argparser.add_argument('-p', '--password', action='store', help='Password for username, not stored')
    argparser.add_argument('-v', '--verbose', help='Enable debugging', action='store_true')
    arguments = argparser.parse_args()

    if arguments.verbose:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.FATAL)

    # connect to host using SSL
    pop3_mail = poplib.POP3_SSL(arguments.server)

    # print the response message from server
    print pop3_mail.getwelcome()

    # send user
    pop3_mail.user(arguments.user)

    # send password
    pop3_mail.pass_(arguments.password)
    logging.debug('logged in to mail')
    data = pop3_mail.stat()[0]

    logging.debug('All selected')
    dates = pd.DataFrame(columns=['Hour of Day', 'Number of Messages'])
    dates['Hour of Day'] = xrange(1, 25)
    dates['Number of Messages'] = [0 for r in xrange(1, 25)]
    logging.debug('data frame initalised')
    logging.debug('{} total messages'.format(data))
    logging.debug('iterating through messages')
    for message_number in xrange(1, data):
        raw_email = pop3_mail.retr(message_number)[1]
        logging.debug('Email: {}'.format(raw_email))
        message = email.message_from_string('\n'.join(raw_email))
        date = email.utils.parsedate_tz(message['Date'])
        logging.debug('Date: {}'.format(date))
        hour = date[HOUR_INDEX]
        logging.debug('Hour: {}'.format(hour))
        dates.xs(hour, copy=False)['Number of Messages'] = dates.xs(hour)['Number of Messages']+1
    logging.debug(dates)
    plt.bar(dates['Hour of Day'], dates['Number of Messages'])
    plt.title('Hour of Day for emails to {}'.format(arguments.user))
    plt.xlabel('Hour')
    plt.ylabel('Number of Messages')
    logging.debug('generated plot')
    out = tempfile.NamedTemporaryFile(prefix='mail', suffix='.png')
    plt.savefig(out.name)
    im = pyimgur.Imgur(u'72a4e1b18bf0d6b')
    uploaded_image = im.upload_image(out.name, title="Uploaded with PyImgur")
    print('Plot available at {}'.format(uploaded_image.link))
    if arguments.verbose:
        webbrowser.open(uploaded_image.link)
Sample output, now looks like:
output graphic

No comments:

Post a Comment