September 1, 2014

How to Record Data

I know it's Labor Day and what-nots and I'm supposed to be celebrating the end of Burning man, being at a bar-be-queue, but I'm not. Instead, I'm tweaking things, bringing me to what I just accomplished -- a flask-based REST API to data, in python, naturally:

from backports import lzma
import cStringIO as StringIO
import csv
import datetime
from flask import Flask, request, Response
import json

# TODO force SSL for post --

DATA_FILE = 'sanguine.csv.xz'
app = Flask(__name__)

@app.route('/', methods = ['GET'])
def index():
    with lzma.LZMAFile(DATA_FILE, 'r') as data:
        output = StringIO.StringIO()
        reader = csv.DictReader(data, fieldnames=['Timestamp','User','Latitude','Longitude'], quoting=csv.QUOTE_MINIMAL, lineterminator='\r\n') # skip header line
        return(Response(json.dumps(list(reader)), mimetype='application/json'))

@app.route('/', methods=['POST'])
def newdatapiece():
    with lzma.LZMAFile(DATA_FILE, 'a') as data:
        writer = csv.DictWriter(data, fieldnames=['Timestamp','User', 'Latitude','Longitude'], quoting = csv.QUOTE_MINIMAL, lineterminator='\r\n')
        row = {}
        row['Timestamp'] ='%s')
        row['User'] = request.form['user_id']
        row['Latitude'] = request.form['lat']
        row['Longitude'] = request.form['lon']
    return '', 201

@app.route('/analyze', methods=['GET']) 
def analysis():
    lines = []
    with lzma.LZMAFile(DATA_FILE, 'r') as data:
        lines = data.readlines()
    return(Response(lines, mimetype='application/csv'))

if __name__ == '__main__':
    with lzma.LZMAFile(DATA_FILE, 'w') as data:
        writer = csv.DictWriter(data, fieldnames=['Timestamp','User', 'Latitude','Longitude'], quoting = csv.QUOTE_MINIMAL, lineterminator='\r\n')
        writer.writeheader()'', port=8080, debug=True)

August 26, 2014

How to Watch webpages for Changes

Today, I encountered a webpage for which there is no rss feed, nor atom feed. I whipped up something to watch the page myself and report on any changes in python. Hey, Guido, if you integrate requests, I won't have any non-stdlib requirements to the script. What do you say? please? Not that the BDFL reads my blog but anyway, here's the code:

import argparse
import hashlib
import json
import logging
import pprint
import requests
import smtplib

def send_mail(msg, user, password):
    server = smtplib.SMTP('', 587)
    server.login(user, password)
    server.sendmail(user, user, msg)

if __name__ == '__main__':
    argparser = argparse.ArgumentParser(description='Check a website for changes')
    argparser.add_argument('-n', '--url', type=str, default=None, help='Add URL to watcher',  action='store')
    argparser.add_argument('-l', '--list', action='store_true')
    argparser.add_argument('-u', '--user', type=str, default='', help='Your username',  action='store')
    argparser.add_argument('-p', '--password', type=str, help='Your password', action='store')
    argparser.add_argument('-v', '--verbose', action='store_false')
    parsed = argparser.parse_args()

    if not parsed.verbose:

    if parsed.url:
        new_hash = {parsed.url: 0}
        output = json.dumps(new_hash)
            with open('/var/tmp/.globetrekker.txt', 'a') as fin:
                data = json.load(fin)
                data[parsed.url] = 0
                json.dump(data, fin)
            with open('/var/tmp/.globetrekker.txt', 'w') as fout:
                json.dump(new_hash, fout)

    with open('/var/tmp/.globetrekker.txt', 'r') as fin:
        stored_hash_json = json.load(fin)
        if parsed.list:
            for k in stored_hash_json:
    new_hashes = []
    stored_hash = stored_hash_json
    for url in stored_hash:
        logging.debug('{} is our URL'.format(url))
        browser = requests.get(url)
        encoding = 'utf-8'
        logging.debug('page retrieved -- {}'.format(url[0]))
        text = browser.content
        encoded = text.encode(encoding, errors='xmlcharrefreplace')
        decoded = encoded.decode(encoding, errors='xmlcharrefreplace')
        new_hash = hashlib.sha1(decoded).hexdigest()
        logging.debug('Calculated hash code: {}'.format(new_hash))
        logging.debug('Stored hash: {}'.format(stored_hash[url]))
        if new_hash != stored_hash[url]:
            logging.debug('{} changed'.format(url))
            if stored_hash[url] != 0:
                send_mail(u'Subject: {} Change detected\r\n\r\n--H'.format(url), parsed.user, parsed.password)

            stored_hash[url] = new_hash
    with open('/var/tmp/.globetrekker.txt', 'w') as fout:
        json.dump(stored_hash, fout)

August 23, 2014

How to Sign Text Using Python

An important tenet of any asymmetric encryption system is that a public key must be distributed far and wide -- it is used to encrypt information -- while the corresponding private key must be kept, well, private. The code below shows how to sign some text with your public key in python:

def sign(message):
    gpg = gnupg.GPG(gnupghome='{}/.gnupg'.format(os.path.expanduser('~')))
    gpg.encoding = 'utf-8'
    signed = gpg.sign(message)
    return str(signed)
Since python-gnupg can take a list for both public and private keys, both filenames go in as a list. Message is a string of plaintext, whose return value's str method returns a cleartext signature as well as the message itself.

How to Set Your User-Agent using PyCurl

An unnamed CDN was blocking my sharing script because it wasn't in its whitelist of approved user agents. And it's a common one. What to do? Fake it, till you make it, to borrow a turn of phrase, like so:

curlObj.setOpt(pycurl.USER_AGENT, 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36')

How to Search for Ports on BSD

BSD systems, well, at least, FreeBSD and OpenBSD feature 3rd party packages in a ports-system. The python script below lets you search for ports by substring:

import argparse
import csv

INDEX = '/usr/ports/INDEX'
if __name__ == '__main__':
    args_ = argparse.ArgumentParser(description='Ports tool for OpenBSD')
    args_.add_argument('query', help='Query', type=unicode, action='store')
    args = args_.parse_args()
    with open(INDEX, 'r') as index:
        reader = csv.reader(index, delimiter='|')
        for line in list(reader):
            if line[0].find(args.query) > 0:
                print line[0]

You need python and to have an index file (modify the path if necessary). The output of this looks like:

% python ./ -q "ruby"

August 19, 2014

How to Synchronise a Syndication Feed to Reddit

The python code below will let you post the newest entry in your blog's syndication feed to Reddit's programming subreddit automatically:

import argparse
import feedparser
import logging
import operator
import praw

if __name__ == '__main__':
    parse = argparse.ArgumentParser(description="Submit a feed's newest entry to reddit")
    parse.add_argument('-f', '--feed', action='store', help='Feed URL', default='')
    parse.add_argument('-p', '--password', action='store', help='Reddit password')
    parse.add_argument('-u', '--user', action='store', help='Reddit Username')
    parse.add_argument('-v', '--verbose', action='store_true', help='Verbose debugging')
    args = parse.parse_args()

    if args.verbose:

    feed = feedparser.parse(args.feed)
    entries = feed.entries
    entries = sorted(entries, key=operator.itemgetter('published'))

    entry = entries[0]
    submission_title = entry.title
    submission_link =

    r = praw.Reddit(user_agent='example')

    r.login(args.user, args.password)
    logging.debug('logged in to reddit as {}'.format(args.user))
    sr = r.get_subreddit('programming')
    sr.submit(submission_title, url=submission_link)

August 18, 2014

How to be your Own CNBC Analyst

We were having a cheeky discussion on lily today about how "You could probably be recognised as a market analyst if you just reported "Dow <rises/falls> on <top headline from>" at the end of the day." I decided to put this to the test, and automate the talking heads out of a job by mining a random headline from the New York Times and the latest NASDAQ quote:

import cStringIO as StringIO
import csv
import feedparser
import logging
import random
import requests

if __name__ == '__main__':
    logging.basicConfig(level = logging.FATAL)
    quotes_ = requests.get('')
    quotes_ = quotes_.content
    quotes = StringIO.StringIO(quotes_)
    reader = list(csv.reader(quotes))
    todays_close = float(reader[1][3])
    yesterdays_close = float(reader[2][3])
    news = feedparser.parse('')
    entries = news.entries
    story = entries[0]

    difference = todays_close - yesterdays_close
    reason = story.title
    print('NYSE change {} because of {}'.format(difference, reason))
python ~/.virtualenvs/marketAnalyst/
NYSE change 0.05 because of Can the U.S. Still Be a Leader in the Middle East?